Compare commits

...

2 Commits

Author SHA1 Message Date
a552d842ab Merge feature/500-phase8-utiq-quickwin : Phase 8 anti-Utiq R0+R1 Quick Win (ref #500)
Some checks are pending
License Headers / check (push) Waiting to run
2026-06-08 16:20:54 +02:00
41ce31ff38 feat(toolbox): Phase 8 Quick Win — anti-Utiq R0 + R1 defense (ref #500)
Operator-grade tracking detection / blocking for the Utiq consortium
(Deutsche Telekom + Orange + Telefónica + Vodafone, launched 2023).
Utiq issues a 90-day-stable mtid identifier per publisher based on
SIM + IP carrier-level auth — cookies and standard anti-fingerprint
defenses don't help.

Quick Win ships R0 + R1 :

  R0 (log)    — every flow involving *.utiq.com or utiqLoader.js is
                stored in /var/lib/secubox/toolbox/toolbox.db so the
                operator can audit. No effect on the flow.
  R1 (block)  — same detection, but flow.response is short-circuited
                to 451 (Unavailable For Legal Reasons) before the
                upstream is contacted. No mtid is ever revealed to
                the publisher. Some pages may degrade — the worst
                case is no targeted ad. Toggle globally via env var
                UTIQ_DEFAULT_LEVEL=R1.

Per-client toggling and R2 (mask) / R3 (pseudo-avatar) ship in Phase 2.

Files :

  mitmproxy_addons/utiq_defense.py
    - hooks requestheaders (short-circuits before body fetched)
    - matches *.utiq.com (host) + utiqLoader.js (path, for 1st-party
      CNAME wrappers)
    - records via secubox_toolbox.utiq.record_event

  secubox_toolbox/utiq.py
    - utiq_events table : id, ts, client_ip, publisher (extracted
      from host), host, path, action, level, detected_mtid (future),
      injected_mtid (future)
    - indexes on ts, (client_ip, ts), (publisher, ts)
    - recent() / aggregates() / client_recent_count() helpers

  secubox_toolbox/api.py
    - GET /admin/utiq-events?hours=&limit= → events + aggregates

  mitmproxy_addons/inject_banner.py
    - new ctx['utiq_recent_count'] from utiq.client_recent_count
    - '📡 utiq:N' tile appears when count > 0

  sbin/secubox-toolbox-mitm-wg-launch
    - utiq_defense inserted between inject_xff and local_store

Live verified : addon loaded, /admin/utiq-events returns empty events
+ aggregates skeleton, no regressions on the existing addon chain.

Closes the Quick Win checkbox in #500.
2026-06-08 16:19:51 +02:00
6 changed files with 382 additions and 1 deletions

View File

@ -1,3 +1,29 @@
secubox-toolbox (2.4.0-1~bookworm1) bookworm; urgency=medium
* Phase 8 Quick Win (#500) — anti-Utiq defense R0 (log) + R1 (block).
Utiq is the operator-grade tracking ID consortium launched in 2023
by Deutsche Telekom + Orange + Telefónica + Vodafone — a carrier-
issued 90-day-stable identifier that cookies + standard anti-
fingerprinting tools can't address.
Shipped :
- mitmproxy_addons/utiq_defense.py addon (placed early in the
chain so R1 short-circuits before downstream addons spin on
a doomed flow). Default level R0 (log) ; toggle to R1 via
env var UTIQ_DEFAULT_LEVEL=R1.
- secubox_toolbox/utiq.py SQLite event store (utiq_events
table with publisher extracted from host, action + level
per record, indexes on ts / client_ip / publisher).
- GET /api/v1/toolbox/admin/utiq-events?hours=&limit= returns
recent events + aggregates (by_publisher / by_client /
by_action) for the admin dashboard.
- inject_banner tile : '📡 utiq:N' appears in the right-side
of the SecuBox banner when the client has hit a Utiq host
in the last hour. Cheap query, fail-open.
- utiq_defense added to the mitm-wg-launch addon chain
between inject_xff and local_store.
-- Gérald Kerma <devel@cybermind.fr> lun., 08 juin 2026 14:19:50 +0000
secubox-toolbox (2.3.3-1~bookworm1) bookworm; urgency=medium secubox-toolbox (2.3.3-1~bookworm1) bookworm; urgency=medium
* Phase 7.E.3 (#498) — new unbound drop-in * Phase 7.E.3 (#498) — new unbound drop-in

View File

@ -237,6 +237,7 @@ def _compute_site_context(flow: http.HTTPFlow) -> dict:
"cookies_sent": 0, "cookies_sent": 0,
"trackers": 0, "trackers": 0,
"is_tracker_host": False, "is_tracker_host": False,
"utiq_recent_count": 0,
} }
# Cookies (cheap : just header counts, name-less for privacy) # Cookies (cheap : just header counts, name-less for privacy)
@ -244,6 +245,20 @@ def _compute_site_context(flow: http.HTTPFlow) -> dict:
ctx["cookies_set"] = set_n ctx["cookies_set"] = set_n
ctx["cookies_sent"] = sent_n ctx["cookies_sent"] = sent_n
# Phase 8 (#500) — Utiq tile : count events from this peer in the
# last hour. Best-effort : if the store import fails or the DB
# isn't reachable we just leave the counter at 0 and the tile
# disappears. No exception ever propagates to the addon chain.
try:
from secubox_toolbox import utiq as _u
peer_ip = None
if flow.client_conn and flow.client_conn.peername:
peer_ip = flow.client_conn.peername[0]
if peer_ip:
ctx["utiq_recent_count"] = _u.client_recent_count(peer_ip, hours=1)
except Exception:
pass
# Trackers : 1st-party host check + body scan # Trackers : 1st-party host check + body scan
ctx["is_tracker_host"] = bool(_TRACKER_HOST_PATTERNS.match(host)) ctx["is_tracker_host"] = bool(_TRACKER_HOST_PATTERNS.match(host))
if flow.response and flow.response.content: if flow.response and flow.response.content:
@ -384,6 +399,13 @@ def _banner_html_dynamic(sha1: str, ctx: dict, csp_strict: bool,
else: else:
target_emoji = "&#x1F3AF;" # 🎯 target_emoji = "&#x1F3AF;" # 🎯
right_parts.append(f"{target_emoji} {trackers}") right_parts.append(f"{target_emoji} {trackers}")
# Phase 8 (#500) — surface Utiq hits for this client. Cheap query
# against the utiq_events store (last 1 h). Avoids surfacing the
# tile on stale state by capping the lookback window.
utiq_n = ctx.get("utiq_recent_count", 0)
if utiq_n > 0:
# 📡 N — operator-grade tracker active
right_parts.append(f"&#x1F4E1; utiq:{utiq_n}")
if ctx["asn"]: if ctx["asn"]:
right_parts.append(_ncr(ctx["asn"])) right_parts.append(_ncr(ctx["asn"]))
right_text = " &#xB7; ".join(right_parts) # middle dot · = &#xB7; right_text = " &#xB7; ".join(right_parts) # middle dot · = &#xB7;

View File

@ -0,0 +1,131 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
#
# Phase 8 (#500) — anti-Utiq defense (Quick Win : R0 log + R1 block).
#
# Utiq is the operator-grade tracking ID launched in 2023 by Deutsche
# Telekom + Orange + Telefónica + Vodafone. Sites participating include
# a loader at `<sitename>.utiq.com/utiqLoader.js` (a 1st-party CNAME)
# that calls the Utiq API ; the carrier validates the request via
# network-level SIM/IP headers and returns a 90-day-stable identifier
# (martechpass = mtid) the publisher can use to track the user across
# visits. Unlike cookies, the user cannot delete a mtid client-side —
# only the carrier's `consenthub.utiq.com` consent record controls it.
#
# Defense levels (per-client, opt-in) :
# R0 log only — passthrough, record every flow involving Utiq
# hosts so the operator sees what's happening.
# R1 block — refuse the loader + API calls. No mtid is ever
# emitted to the publisher. Some pages may degrade
# (the Utiq tag is usually wrapped in `if (mtid)`,
# so the worst case is no targeted ad).
# R2 mask — (Phase 2, future) return a stub `utiqLoader.js`
# that sets `window.utiq = {mtid: null, atid: null}`
# so the page sees a "no consent" state.
# R3 pseudo — (Phase 2, future) forge a stable-per-publisher
# pseudo-mtid via avatar.py to poison the tracking
# pool.
#
# This Quick Win ships R0 + R1. Levels R2 / R3 land in Phase 2.
from __future__ import annotations
import logging
import re
from mitmproxy import http
# Importing the toolbox state store is best-effort : the addon must
# still load even when the host doesn't have the toolbox package
# installed (e.g. a standalone mitmproxy install).
try:
from secubox_toolbox import utiq as _store
except Exception:
_store = None
log = logging.getLogger("secubox.toolbox.utiq")
# ── Host + path matchers ──
# *.utiq.com covers consenthub.utiq.com + every <publisher>.utiq.com
# CNAME wrapper. The path matcher catches the loader regardless of
# subdomain ; some publishers proxy `utiqLoader.js` through their own
# 1st-party path (`/static/js/utiqLoader.js`) to bypass simple host
# filters.
_RE_HOST = re.compile(r"(^|\.)utiq\.com$", re.IGNORECASE)
_RE_PATH = re.compile(r"utiqLoader\.js", re.IGNORECASE)
def _is_utiq_flow(flow: http.HTTPFlow) -> bool:
host = flow.request.pretty_host or ""
if _RE_HOST.search(host):
return True
if _RE_PATH.search(flow.request.path or ""):
return True
return False
def _client_ip(flow: http.HTTPFlow) -> str | None:
try:
return flow.client_conn.peername[0]
except Exception:
return None
def _level(flow: http.HTTPFlow) -> str:
"""Return the current defense level for the client behind this flow.
Phase 8 Quick Win defaults to R0 (log) for every client. Per-
client level customisation comes in Phase 2 when the admin UI
exposes the per-client toggle. In the meantime an operator can
override via env var `UTIQ_DEFAULT_LEVEL=R1` to flip everyone to
block mode globally.
"""
import os
return (os.environ.get("UTIQ_DEFAULT_LEVEL") or "R0").upper()
class UtiqDefense:
"""Detect, log, and (R1) block Utiq tracking flows."""
def requestheaders(self, flow: http.HTTPFlow) -> None:
# We hook requestheaders rather than request so we can RST the
# connection BEFORE the body is fetched (saves bandwidth on a
# blocked utiqLoader.js).
if not _is_utiq_flow(flow):
return
client_ip = _client_ip(flow)
host = flow.request.pretty_host or ""
path = flow.request.path or ""
level = _level(flow)
# Always log, regardless of level — that's the R0 baseline.
if _store is not None:
try:
_store.record_event(
client_ip=client_ip,
host=host,
path=path,
action=("block" if level == "R1" else "log"),
level=level,
)
except Exception as e:
log.warning("utiq event store failed: %s", e)
if level == "R1":
# Short-circuit the flow : return a 451 (Unavailable For
# Legal Reasons) so the page's JS can detect the block.
# 451 is more truthful than 404 here — we're refusing to
# serve operator-tracker content on privacy grounds.
flow.response = http.Response.make(
451,
b'{"error":"blocked_by_secubox","reason":"utiq_tracker"}',
{"Content-Type": "application/json",
"X-SecuBox-Utiq-Block": "R1"},
)
log.info("[utiq R1] blocked %s %s for client=%s",
host, path, client_ip)
addons = [UtiqDefense()]

View File

@ -68,8 +68,11 @@ fi
# Addons : # Addons :
# - inject_xff (Phase 7 #498) MUST be FIRST — sets X-Forwarded-For at # - inject_xff (Phase 7 #498) MUST be FIRST — sets X-Forwarded-For at
# requestheaders so other addons and the upstream see the real peer IP # requestheaders so other addons and the upstream see the real peer IP
# - utiq_defense (Phase 8 #500) runs at requestheaders too ; placed
# EARLY so a R1 block short-circuits the flow before downstream
# addons spend cycles on it
# - cert_pin_detect auto-learns pinned hosts (Phase 6.N) # - cert_pin_detect auto-learns pinned hosts (Phase 6.N)
for addon in inject_xff local_store inject_banner dpi cookies avatar ja4 soc_relay cert_pin_detect; do for addon in inject_xff utiq_defense local_store inject_banner dpi cookies avatar ja4 soc_relay cert_pin_detect; do
ARGS+=(-s "$ADDON_DIR/${addon}.py") ARGS+=(-s "$ADDON_DIR/${addon}.py")
done done

View File

@ -1962,6 +1962,21 @@ async def report(token: str) -> Response:
# ───────────────── Admin (Phase 1 minimal) ───────────────── # ───────────────── Admin (Phase 1 minimal) ─────────────────
@router.get("/admin/utiq-events")
async def admin_utiq_events(hours: int = 24, limit: int = 200) -> dict:
"""Phase 8 (#500) — silenced-but-tracked Utiq detections.
Lists every event the mitm-wg `utiq_defense` addon recorded within
the window (default 24 h, max 31 d). Operator dashboard uses this
to surface the per-client + per-publisher views.
"""
from . import utiq as _u
return {
"events": _u.recent(hours=hours, limit=limit),
"aggregates": _u.aggregates(hours=hours),
}
@router.get("/admin/config") @router.get("/admin/config")
async def admin_config() -> dict: async def admin_config() -> dict:
return _get_cfg().model_dump() return _get_cfg().model_dump()

View File

@ -0,0 +1,184 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
"""
SecuBox-Deb :: ToolBoX Utiq event store
Phase 8 (#500) — store every Utiq-tracker flow seen by the mitm-wg
addon so the operator can audit silence-but-track activity in the
admin UI.
Schema kept intentionally minimal :
- client_ip is the WG peer IP (10.99.1.x). Already a pseudo-
identifier (anonymous), no need to hash again here.
- publisher is derived from the host (the part BEFORE `.utiq.com`
for CNAME wrappers, or `consenthub` / `utiq` for direct calls).
- action {log, block, mask, pseudo}.
- level mirrors the defense level in effect at the time.
- detected_mtid is reserved for Phase 2 (when we parse the response
body to extract the mtid mitm-wg would have revealed to the
publisher).
"""
from __future__ import annotations
import logging
import sqlite3
import time
from pathlib import Path
from typing import Dict, List, Optional
log = logging.getLogger("secubox.toolbox.utiq.store")
DB_PATH = Path("/var/lib/secubox/toolbox/toolbox.db")
_SCHEMA = """
CREATE TABLE IF NOT EXISTS utiq_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts INTEGER NOT NULL,
client_ip TEXT,
publisher TEXT,
host TEXT NOT NULL,
path TEXT,
action TEXT NOT NULL,
level TEXT NOT NULL,
detected_mtid TEXT,
injected_mtid TEXT
);
CREATE INDEX IF NOT EXISTS idx_utiq_ts ON utiq_events(ts);
CREATE INDEX IF NOT EXISTS idx_utiq_client ON utiq_events(client_ip, ts);
CREATE INDEX IF NOT EXISTS idx_utiq_publisher ON utiq_events(publisher, ts);
"""
def _conn() -> sqlite3.Connection:
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
c = sqlite3.connect(str(DB_PATH), timeout=5.0, isolation_level=None)
c.row_factory = sqlite3.Row
c.executescript(_SCHEMA)
return c
def _publisher_from_host(host: str) -> str:
"""Derive a publisher tag from the host.
`consenthub.utiq.com` 'consenthub'
`lemonde.utiq.com` 'lemonde'
`utiq.com` (rare direct) 'utiq'
`cdn.example.com` (path-only) 'example.com' (fallback)
"""
h = (host or "").lower()
if h.endswith(".utiq.com"):
return h[: -len(".utiq.com")].rsplit(".", 1)[-1] or "utiq"
if h == "utiq.com":
return "utiq"
# path-only match (some sites serve utiqLoader.js from their own
# 1st-party domain)
parts = h.split(".")
if len(parts) >= 2:
return ".".join(parts[-2:])
return h or "unknown"
def record_event(
*,
client_ip: Optional[str],
host: str,
path: Optional[str],
action: str,
level: str,
detected_mtid: Optional[str] = None,
injected_mtid: Optional[str] = None,
) -> None:
"""Insert one event. Best-effort — never raises into the addon."""
try:
with _conn() as c:
c.execute(
"INSERT INTO utiq_events(ts, client_ip, publisher, host, "
"path, action, level, detected_mtid, injected_mtid) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
(
int(time.time()),
client_ip,
_publisher_from_host(host),
host,
path,
action,
level,
detected_mtid,
injected_mtid,
),
)
except Exception as e:
log.warning("record_event failed: %s", e)
def recent(hours: int = 24, limit: int = 200) -> List[Dict]:
"""Return the last events within the window, newest first."""
since = int(time.time()) - hours * 3600
if hours < 1 or hours > 24 * 31:
hours = 24
if limit < 1 or limit > 5000:
limit = 200
with _conn() as c:
cur = c.execute(
"SELECT id, ts, client_ip, publisher, host, path, action, "
"level, detected_mtid, injected_mtid "
"FROM utiq_events WHERE ts >= ? ORDER BY ts DESC LIMIT ?",
(since, limit),
)
return [dict(r) for r in cur.fetchall()]
def aggregates(hours: int = 24) -> Dict:
"""Counts by publisher + by client + by action for the dashboard."""
since = int(time.time()) - hours * 3600
out: Dict = {"window_hours": hours, "total": 0, "by_publisher": [],
"by_client": [], "by_action": []}
with _conn() as c:
out["total"] = c.execute(
"SELECT COUNT(*) FROM utiq_events WHERE ts >= ?",
(since,),
).fetchone()[0]
out["by_publisher"] = [
dict(r) for r in c.execute(
"SELECT publisher, COUNT(*) AS n FROM utiq_events "
"WHERE ts >= ? GROUP BY publisher "
"ORDER BY n DESC LIMIT 25",
(since,),
).fetchall()
]
out["by_client"] = [
dict(r) for r in c.execute(
"SELECT client_ip, COUNT(*) AS n FROM utiq_events "
"WHERE ts >= ? AND client_ip IS NOT NULL "
"GROUP BY client_ip ORDER BY n DESC LIMIT 25",
(since,),
).fetchall()
]
out["by_action"] = [
dict(r) for r in c.execute(
"SELECT action, COUNT(*) AS n FROM utiq_events "
"WHERE ts >= ? GROUP BY action ORDER BY n DESC",
(since,),
).fetchall()
]
return out
def client_recent_count(client_ip: str, hours: int = 1) -> int:
"""Used by inject_banner to decide whether to surface the Utiq tile.
Cheap query used per-request on banner-eligible flows.
"""
if not client_ip:
return 0
since = int(time.time()) - hours * 3600
try:
with _conn() as c:
return c.execute(
"SELECT COUNT(*) FROM utiq_events "
"WHERE client_ip = ? AND ts >= ?",
(client_ip, since),
).fetchone()[0]
except Exception:
return 0