Compare commits

...

2 Commits

Author SHA1 Message Date
CyberMind
1de8b29865
Merge pull request #585 from CyberMind-FR/feature/577-perf-video-photo-cdn-proxy-cache-shared
Some checks are pending
License Headers / check (push) Waiting to run
Shared media proxy-cache (default off) (#577)
2026-06-14 15:14:35 +02:00
3b8daf964e feat(toolbox): shared media proxy-cache, default off (closes #577)
mitmproxy_addons/media_cache.py — one upstream fetch serves all R2/R3
clients: cacheable GET media/static (image/video-segment/audio/font/css/js)
on disk (/var/cache/secubox/toolbox/media), keyed by URL. Safety: 16MB/obj
cap gated on Content-Length (large video passthrough, no RAM hold), 2GB LRU,
skips Range/auth/Set-Cookie/no-store, fail-open. Opt-in filter media_cache.
api /admin/cache stats + WebUI toggle + launcher + postinst dir.
secubox-toolbox 2.6.32. Unit-tested (HIT across clients, cap, range, segment).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 15:13:59 +02:00
6 changed files with 301 additions and 2 deletions

View File

@ -1,3 +1,19 @@
secubox-toolbox (2.6.32-1~bookworm1) bookworm; urgency=medium
* Shared media proxy-cache (#577) — DEFAULT OFF (opt-in filter `media_cache`).
- mitmproxy_addons/media_cache.py : cacheable GET media/static (image /
video segment / audio / font / css / js) stored on disk under
/var/cache/secubox/toolbox/media, keyed by URL, served from cache to
EVERY R2/R3 client (one upstream fetch → all clients). Safety rails
for the RAM-light board : 16 MB/object cap gated on Content-Length
(large/progressive video passed through, never RAM-held by us), 2 GB
on-disk LRU, never caches Range/auth/Set-Cookie/no-store, fail-open.
- filters: media_cache toggle (WebUI panel). api: GET /admin/cache
(hits/misses/hit-rate/Mo served/size). postinst: cache dir.
Wired into the mitm-wg launcher (request hook serves, response stores).
-- Gerald KERMA <devel@cybermind.fr> Sun, 14 Jun 2026 14:45:00 +0200
secubox-toolbox (2.6.31-1~bookworm1) bookworm; urgency=medium
* ad_ghost: remove ad placeholders entirely (#584, reverses #576). Ghosted

View File

@ -61,6 +61,9 @@ SBXFILTERS
# 4. Storage dir (SQLite + future PDF reports)
install -d -m 0750 -o secubox-toolbox -g secubox-toolbox /var/lib/secubox/toolbox
# #577 : shared media proxy-cache dir (opt-in via filters ; 2 GB LRU).
install -d -m 0750 -o secubox-toolbox -g secubox-toolbox /var/cache/secubox/toolbox/media 2>/dev/null || \
mkdir -p /var/cache/secubox/toolbox/media
# #536 : Android APK serve dir + best-effort fetch of the latest
# release asset (so GET /wg/toolbox.apk serves it locally/offline).
# Non-blocking : if there's no release yet / no network, the endpoint

View File

@ -0,0 +1,250 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
#
# #577 — shared media proxy-cache. One fetch serves every R2/R3 client:
# cacheable GET media/static (image / video segment / audio / font / css /
# js) is stored on disk, keyed by URL, and served from cache on subsequent
# requests from ANY client — saving upstream bandwidth + latency.
#
# Safety rails for the RAM-light cabine:
# - DEFAULT OFF (filter `media_cache` toggle) ; instantly killable.
# - per-object cap (16 MB) gated on Content-Length BEFORE buffering, so a
# large/progressive video is passed through, never cached/RAM-held by us.
# - 2 GB on-disk LRU budget, evicted oldest-first.
# - never caches Range/partial, authenticated, Set-Cookie, no-store/private.
# - fail-open everywhere : a cache error must never break the flow.
from __future__ import annotations
import hashlib
import json
import os
import re
import sys
import time
from mitmproxy import http
try:
if "/usr/lib/secubox/toolbox" not in sys.path:
sys.path.insert(0, "/usr/lib/secubox/toolbox")
from secubox_toolbox.filters import get_filters
except Exception:
def get_filters(force: bool = False):
return {"media_cache": False}
CACHE_DIR = "/var/cache/secubox/toolbox/media"
STATS = "/run/secubox/media_cache.json"
MAX_OBJ = 16 * 1024 * 1024 # 16 MB / object
MAX_TOTAL = 2 * 1024 * 1024 * 1024 # 2 GB on disk
DEFAULT_TTL = 3600 # 1 h when upstream gives no max-age
_CACHEABLE = ("image/", "video/", "audio/", "font/", "text/css",
"javascript", "ecmascript", "application/font",
"application/vnd.ms-fontobject")
_MAXAGE = re.compile(r"max-age\s*=\s*(\d+)", re.IGNORECASE)
# in-memory index (mitmproxy hooks run single-threaded in the event loop)
_index: dict = {} # key -> {"size": int, "exp": float, "atime": float, "ct": str}
_total = 0
_stats = {"hits": 0, "misses": 0, "stored": 0, "evicted": 0,
"bytes_served": 0, "since": int(time.time())}
_last_flush = 0.0
def _key(url: str) -> str:
return hashlib.sha256(url.encode("utf-8", "ignore")).hexdigest()
def _paths(key: str):
d = os.path.join(CACHE_DIR, key[:2])
return os.path.join(d, key), os.path.join(d, key + ".m")
def _enabled() -> bool:
try:
return bool(get_filters().get("media_cache"))
except Exception:
return False
def _cacheable_ct(ct: str) -> bool:
ct = (ct or "").split(";", 1)[0].strip().lower()
return bool(ct) and any(f in ct for f in _CACHEABLE)
def _flush_stats(force: bool = False) -> None:
global _last_flush
now = time.time()
if not force and (now - _last_flush) < 5:
return
_last_flush = now
try:
os.makedirs(os.path.dirname(STATS), exist_ok=True)
with open(STATS, "w", encoding="utf-8") as f:
json.dump({**_stats, "objects": len(_index),
"bytes_cached": _total, "updated": int(now)}, f)
except Exception:
pass
def _load_index() -> None:
"""Rebuild the index from disk on startup (bounded, best-effort)."""
global _total
try:
for sub in os.listdir(CACHE_DIR):
d = os.path.join(CACHE_DIR, sub)
if not os.path.isdir(d):
continue
for name in os.listdir(d):
if name.endswith(".m"):
continue
fp = os.path.join(d, name)
try:
st = os.stat(fp)
meta = {}
mp = fp + ".m"
if os.path.exists(mp):
with open(mp, encoding="utf-8") as mf:
meta = json.load(mf)
_index[name] = {"size": st.st_size,
"exp": meta.get("exp", 0),
"atime": st.st_atime,
"ct": meta.get("ct", "")}
_total += st.st_size
except Exception:
pass
except FileNotFoundError:
pass
def _evict_if_needed() -> None:
global _total
if _total <= MAX_TOTAL:
return
# oldest atime first
for key, e in sorted(_index.items(), key=lambda kv: kv[1]["atime"]):
if _total <= MAX_TOTAL:
break
body, meta = _paths(key)
try:
os.remove(body)
except OSError:
pass
try:
os.remove(meta)
except OSError:
pass
_total -= e["size"]
_index.pop(key, None)
_stats["evicted"] += 1
class MediaCache:
def __init__(self):
try:
os.makedirs(CACHE_DIR, exist_ok=True)
_load_index()
except Exception:
pass
# ── serve from cache (request hook) ──
def request(self, flow: http.HTTPFlow) -> None:
if not _enabled():
return
r = flow.request
if r.method != "GET":
return
if "range" in r.headers or "authorization" in r.headers:
return
key = _key(r.pretty_url or "")
e = _index.get(key)
if not e:
_stats["misses"] += 1
return
if e["exp"] and e["exp"] < time.time():
return # stale — let it revalidate/refetch (and re-store)
body_path, _m = _paths(key)
try:
with open(body_path, "rb") as f:
body = f.read()
except OSError:
_index.pop(key, None)
return
e["atime"] = time.time()
try:
os.utime(body_path, None)
except OSError:
pass
_stats["hits"] += 1
_stats["bytes_served"] += len(body)
_flush_stats()
flow.response = http.Response.make(
200, body,
{"Content-Type": e.get("ct") or "application/octet-stream",
"X-SecuBox-Cache": "HIT",
"Cache-Control": "public, max-age=300"},
)
# ── store to cache (response hook) ──
def response(self, flow: http.HTTPFlow) -> None:
global _total
if not _enabled() or not flow.response:
return
r = flow.request
resp = flow.response
if r.method != "GET" or resp.status_code != 200:
return
if "range" in r.headers or "authorization" in r.headers:
return
if resp.headers.get("x-secubox-cache") == "HIT":
return
cc = (resp.headers.get("cache-control", "") or "").lower()
if "no-store" in cc or "private" in cc:
return
if "set-cookie" in resp.headers:
return
if not _cacheable_ct(resp.headers.get("content-type", "")):
return
# size gate on the HEADER — never cache (nor force-buffer) > MAX_OBJ
try:
clen = int(resp.headers.get("content-length", "0") or "0")
except (TypeError, ValueError):
clen = 0
if clen <= 0 or clen > MAX_OBJ:
return
try:
body = resp.content or b""
except Exception:
return
if not body or len(body) > MAX_OBJ:
return
# freshness window
m = _MAXAGE.search(cc)
ttl = int(m.group(1)) if m else DEFAULT_TTL
if ttl <= 0:
return
key = _key(r.pretty_url or "")
body_path, meta_path = _paths(key)
try:
os.makedirs(os.path.dirname(body_path), exist_ok=True)
tmp = body_path + ".tmp"
with open(tmp, "wb") as f:
f.write(body)
os.replace(tmp, body_path)
with open(meta_path, "w", encoding="utf-8") as f:
json.dump({"ct": (resp.headers.get("content-type", "") or "").split(";")[0],
"exp": time.time() + ttl,
"url": (r.pretty_url or "")[:300]}, f)
except Exception:
return
old = _index.get(key, {}).get("size", 0)
_total += len(body) - old
_index[key] = {"size": len(body), "exp": time.time() + ttl,
"atime": time.time(),
"ct": (resp.headers.get("content-type", "") or "").split(";")[0]}
_stats["stored"] += 1
_evict_if_needed()
_flush_stats()
addons = [MediaCache()]

View File

@ -110,7 +110,7 @@ fi
# ad_ghost (#566) runs right after protective_mode: for R3+/R4 it 204s known
# ad/tracker hosts (bandwidth save) at request time and injects ad-hiding CSS
# on HTML responses. Gated by the modular filter config (toolbox WebUI).
for addon in inject_xff utiq_defense protective_mode ad_ghost local_store social_graph inject_banner dpi cookies avatar ja4 soc_relay cert_pin_detect media_stats; do
for addon in inject_xff utiq_defense protective_mode ad_ghost media_cache local_store social_graph inject_banner dpi cookies avatar ja4 soc_relay cert_pin_detect media_stats; do
ARGS+=(-s "$ADDON_DIR/${addon}.py")
done

View File

@ -2455,6 +2455,32 @@ _MEDIA_EMOJI = {
}
@router.get("/admin/cache")
async def admin_cache() -> dict:
"""#577 — shared media cache stats (hits/misses/bytes served/size)."""
import json as _json
from pathlib import Path as _P
out: dict = {"hits": 0, "misses": 0, "stored": 0, "evicted": 0,
"bytes_served": 0, "objects": 0, "bytes_cached": 0,
"since": None, "updated": None}
try:
st = _P("/run/secubox/media_cache.json")
if st.exists():
out.update(_json.loads(st.read_text()))
except Exception:
pass
tot = (out.get("hits", 0) + out.get("misses", 0)) or 1
out["hit_rate"] = round(100 * out.get("hits", 0) / tot, 1)
out["mb_served"] = round(out.get("bytes_served", 0) / 1048576, 1)
out["mb_cached"] = round(out.get("bytes_cached", 0) / 1048576, 1)
try:
from .filters import get_filters as _gf
out["enabled"] = bool(_gf().get("media_cache"))
except Exception:
out["enabled"] = False
return out
@router.get("/admin/media")
async def admin_media() -> dict:
"""#570 — DPI media/content-type statistics for the donut UI."""
@ -2570,6 +2596,9 @@ async def admin_filters_ui() -> HTMLResponse:
<h2>Ghosting pub (R3+/R4)</h2>
<label><input type=checkbox data-k=ad_ghost> Masquer pubs/bannières/widgets (cosmétique)</label>
<label><input type=checkbox data-k=ad_ghost_block> Bloquer les hôtes pub/traceurs (économise la bande passante)</label>
<h2>Cache média partagé (#577)</h2>
<label><input type=checkbox data-k=media_cache> Cache média/photo/vidéo partagé (2 Go, 1 fetch tous les clients)</label>
<h2>Catégories ghosting</h2>
<label><input type=checkbox data-c=ads> · catégorie : publicités</label>
<label><input type=checkbox data-c=consent_nag> · catégorie : bandeaux cookies/consentement</label>
<label><input type=checkbox data-c=newsletter> · catégorie : pop-ups newsletter</label>

View File

@ -21,6 +21,7 @@ DEFAULTS: Dict = {
"protective": "spoof", # off | alert | spoof (tracker spoofer)
"ad_ghost": True, # R3+/R4 silent ad/banner/widget ghosting
"ad_ghost_block": True, # 204 known ad/tracker hosts (save bandwidth)
"media_cache": False, # #577 shared media proxy-cache (opt-in)
"ad_ghost_categories": { # cosmetic ghost groups
"ads": True,
"consent_nag": True,
@ -72,7 +73,7 @@ def set_filters(patch: Dict) -> Dict:
if ck in DEFAULTS["ad_ghost_categories"]})
elif k == "protective" and v in _VALID_PROTECTIVE:
cur["protective"] = v
elif k in ("banner", "ad_ghost", "ad_ghost_block"):
elif k in ("banner", "ad_ghost", "ad_ghost_block", "media_cache"):
cur[k] = bool(v)
try:
os.makedirs(os.path.dirname(FILTERS_PATH), exist_ok=True)