Compare commits

..

3 Commits

Author SHA1 Message Date
6dba5a08d6 docs: WAF open-proxy fix + behind-WAF media cache (#605, #607)
Some checks are pending
License Headers / check (push) Waiting to run
2026-06-15 18:10:49 +02:00
CyberMind
211cff09b5
Merge pull request #608 from CyberMind-FR/feature/607-waf-behind-waf-media-cache-image-video-s
feat(waf): behind-WAF media cache (image/video/static) for hosted vhosts
2026-06-15 18:09:44 +02:00
3290f3b7c0 feat(waf): behind-WAF media cache for hosted vhosts (closes #607)
New media_cache.py addon (both synced copies) caches cacheable GET
media/static (image/video/audio/font/css/js) from our vhosts on disk
(URL key, 16MB/obj, 2GB LRU, TTL from max-age) and serves repeats from
cache. NOT a bypass: requests still pass secubox_waf inspection; only the
response body is served from a WAF-populated cache. Loaded in the LXC
mitmproxy.service; wafctl creates the cache dir. Toggle via
/data/mitmproxy/media-cache.json (default on). Verified live: HIT.
2026-06-15 18:09:25 +02:00
7 changed files with 516 additions and 2 deletions

View File

@ -3,6 +3,33 @@
---
## 2026-06-15 — WAF hardening + perf: close open-proxy, behind-WAF media cache
Follow-up to the WAF restoration. Three findings investigated; two fixed.
- **Open forward-proxy / loops (#605/PR #606, mitmproxy 1.0.6 + waf 1.2.4).**
`--mode regular` + HAProxy `default_backend mitmproxy_inspector` made the WAF
an open proxy: internet scanners (114.66.25.146, 211.154.17.165,
hashtagbrock.nl) drove a **72% backend-error rate** + 11 self-loop 508s/hr.
The `requestheaders` hook now serves ONLY our vhosts (routes / our domains
via routes-derived `local_suffixes` → nginx :9080 / `SELF_HOSTS`) and returns
**421 with no upstream connect** otherwise. Live: 0 external server-connects,
0 loop-508s, apt/admin/kbin 200, scanners 421.
- **Behind-WAF media cache (#607/PR #608, mitmproxy 1.0.7 + waf 1.2.5).** New
`media_cache.py` addon caches cacheable GET media/static (image/video/audio/
font/css/js) from our vhosts on disk (URL key, 16 MB/obj, 2 GB LRU, TTL from
`max-age`) and serves repeats from cache — backend-load + latency win for
hosted media. **Not a bypass**: requests still pass `secubox_waf` inspection;
only the response body is served from a WAF-populated cache. Toggle
`/data/mitmproxy/media-cache.json` (default on). Live: `X-SecuBox-Cache: HIT`.
Gate fix vs the toolbox copy: cache on body length (our nginx is chunked).
- **WG R3 tunnel** (`wg-toolbox`, 4 peers, 4 `mitm-wg-worker@{1..4}`) is
healthy — not the bottleneck; the WAF open-proxy churn was. All fixes ported
to source (both synced `secubox_waf.py` copies) + rebuilt into apt.secubox.in.
**Still optional:** relax the forced `Connection: close` (FD-leak fix #496) to
bounded keep-alive now that scanner churn is gone — lower per-request latency.
## 2026-06-15 — APT repo: all packages published + signed (apt.secubox.in)
Made the apt repo at `https://admin.gk2.secubox.in/repo/` (served from

View File

@ -0,0 +1,231 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
#
# #607 — behind-WAF media proxy-cache for the mitmproxy inspection LXC.
# Cacheable GET media/static (image / video / audio / font / css / js) served
# by our own vhosts is stored on disk keyed by URL and served from cache on
# repeat requests — cutting backend load + latency for hosted media
# (peertube / photoprism / nextcloud …). NOT a WAF bypass: the request still
# passes secubox_waf inspection (request hook runs first); only the response
# BODY is served from a cache the WAF itself populated from inspected
# responses. Fail-open everywhere — a cache error never breaks the flow.
from __future__ import annotations
import hashlib
import json
import os
import re
import time
from mitmproxy import http
CACHE_DIR = "/data/mitmproxy/cache/media"
STATS = "/data/mitmproxy/logs/media_cache.json"
CONFIG = "/data/mitmproxy/media-cache.json" # {"enabled": true} — default on
MAX_OBJ = 16 * 1024 * 1024 # 16 MB / object
MAX_TOTAL = 2 * 1024 * 1024 * 1024 # 2 GB on disk
DEFAULT_TTL = 3600 # 1 h when upstream gives no max-age
_CACHEABLE = ("image/", "video/", "audio/", "font/", "text/css",
"javascript", "ecmascript", "application/font",
"application/vnd.ms-fontobject")
_MAXAGE = re.compile(r"max-age\s*=\s*(\d+)", re.IGNORECASE)
_index: dict = {}
_total = 0
_stats = {"hits": 0, "misses": 0, "stored": 0, "evicted": 0,
"bytes_served": 0, "since": int(time.time())}
_last_flush = 0.0
_cfg = {"enabled": True}
_cfg_mtime = 0.0
def _key(url: str) -> str:
return hashlib.sha256(url.encode("utf-8", "ignore")).hexdigest()
def _paths(key: str):
d = os.path.join(CACHE_DIR, key[:2])
return os.path.join(d, key), os.path.join(d, key + ".m")
def _enabled() -> bool:
global _cfg, _cfg_mtime
try:
st = os.stat(CONFIG)
if st.st_mtime != _cfg_mtime:
_cfg_mtime = st.st_mtime
with open(CONFIG, encoding="utf-8") as f:
_cfg = json.load(f)
except FileNotFoundError:
pass
except Exception:
return True
return bool(_cfg.get("enabled", True))
def _cacheable_ct(ct: str) -> bool:
ct = (ct or "").split(";", 1)[0].strip().lower()
return bool(ct) and any(f in ct for f in _CACHEABLE)
def _flush_stats(force: bool = False) -> None:
global _last_flush
now = time.time()
if not force and (now - _last_flush) < 5:
return
_last_flush = now
try:
os.makedirs(os.path.dirname(STATS), exist_ok=True)
with open(STATS, "w", encoding="utf-8") as f:
json.dump({**_stats, "objects": len(_index),
"bytes_cached": _total, "updated": int(now)}, f)
except Exception:
pass
def _load_index() -> None:
global _total
try:
for sub in os.listdir(CACHE_DIR):
d = os.path.join(CACHE_DIR, sub)
if not os.path.isdir(d):
continue
for name in os.listdir(d):
if name.endswith(".m"):
continue
fp = os.path.join(d, name)
try:
st = os.stat(fp)
meta = {}
mp = fp + ".m"
if os.path.exists(mp):
with open(mp, encoding="utf-8") as mf:
meta = json.load(mf)
_index[name] = {"size": st.st_size, "exp": meta.get("exp", 0),
"atime": st.st_atime, "ct": meta.get("ct", "")}
_total += st.st_size
except Exception:
pass
except FileNotFoundError:
pass
def _evict_if_needed() -> None:
global _total
if _total <= MAX_TOTAL:
return
for key, e in sorted(_index.items(), key=lambda kv: kv[1]["atime"]):
if _total <= MAX_TOTAL:
break
body, meta = _paths(key)
for p in (body, meta):
try:
os.remove(p)
except OSError:
pass
_total -= e["size"]
_index.pop(key, None)
_stats["evicted"] += 1
class MediaCache:
def __init__(self):
try:
os.makedirs(CACHE_DIR, exist_ok=True)
_load_index()
except Exception:
pass
def request(self, flow: http.HTTPFlow) -> None:
if not _enabled():
return
r = flow.request
if r.method != "GET" or "range" in r.headers or "authorization" in r.headers:
return
key = _key(r.pretty_url or "")
e = _index.get(key)
if not e:
_stats["misses"] += 1
return
if e["exp"] and e["exp"] < time.time():
return
body_path, _m = _paths(key)
try:
with open(body_path, "rb") as f:
body = f.read()
except OSError:
_index.pop(key, None)
return
e["atime"] = time.time()
try:
os.utime(body_path, None)
except OSError:
pass
_stats["hits"] += 1
_stats["bytes_served"] += len(body)
_flush_stats()
flow.response = http.Response.make(
200, body,
{"Content-Type": e.get("ct") or "application/octet-stream",
"X-SecuBox-Cache": "HIT",
"Cache-Control": "public, max-age=300"},
)
def response(self, flow: http.HTTPFlow) -> None:
global _total
if not _enabled() or not flow.response:
return
r = flow.request
resp = flow.response
if r.method != "GET" or resp.status_code != 200:
return
if "range" in r.headers or "authorization" in r.headers:
return
if resp.headers.get("x-secubox-cache") == "HIT":
return
cc = (resp.headers.get("cache-control", "") or "").lower()
if "no-store" in cc or "private" in cc or "set-cookie" in resp.headers:
return
if not _cacheable_ct(resp.headers.get("content-type", "")):
return
try:
clen = int(resp.headers.get("content-length", "0") or "0")
except (TypeError, ValueError):
clen = 0
if clen > MAX_OBJ: # header short-circuit; body-size gate below covers chunked
return
try:
body = resp.content or b""
except Exception:
return
if not body or len(body) > MAX_OBJ:
return
m = _MAXAGE.search(cc)
ttl = int(m.group(1)) if m else DEFAULT_TTL
if ttl <= 0:
return
key = _key(r.pretty_url or "")
body_path, meta_path = _paths(key)
ct = (resp.headers.get("content-type", "") or "").split(";")[0]
try:
os.makedirs(os.path.dirname(body_path), exist_ok=True)
tmp = body_path + ".tmp"
with open(tmp, "wb") as f:
f.write(body)
os.replace(tmp, body_path)
with open(meta_path, "w", encoding="utf-8") as f:
json.dump({"ct": ct, "exp": time.time() + ttl,
"url": (r.pretty_url or "")[:300]}, f)
except Exception:
return
old = _index.get(key, {}).get("size", 0)
_total += len(body) - old
_index[key] = {"size": len(body), "exp": time.time() + ttl,
"atime": time.time(), "ct": ct}
_stats["stored"] += 1
_evict_if_needed()
_flush_stats()
addons = [MediaCache()]

View File

@ -1,3 +1,16 @@
secubox-mitmproxy (1.0.7-1~bookworm1) bookworm; urgency=medium
* feat(waf): behind-WAF media cache (#607). New media_cache.py addon caches
cacheable GET media/static (image/video/audio/font/css/js) from our vhosts
on disk (URL key, 16 MB/obj, 2 GB LRU, TTL from max-age) and serves repeat
requests from cache — cutting backend load + latency for hosted media. Not
a bypass: requests still pass secubox_waf inspection; only the response
body is served from a WAF-populated cache. Toggle via
/data/mitmproxy/media-cache.json {"enabled": true} (default on). Verified
live: X-SecuBox-Cache: HIT.
-- Gerald KERMA <devel@cybermind.fr> Mon, 15 Jun 2026 17:00:00 +0200
secubox-mitmproxy (1.0.6-1~bookworm1) bookworm; urgency=medium
* fix(waf): refuse unmapped hosts — close the open forward-proxy (#605). In

View File

@ -1,3 +1,13 @@
secubox-waf (1.2.5-1~bookworm1) bookworm; urgency=medium
* feat(waf): behind-WAF media cache (#607) — ship media_cache.py addon copy,
load it in the LXC mitmproxy.service ExecStart, and create
/data/mitmproxy/cache/media + logs in wafctl provisioning. Caches hosted
media (image/video/static) for repeat requests; not a bypass (requests
still inspected). Synced with secubox-mitmproxy.
-- Gerald KERMA <devel@cybermind.fr> Mon, 15 Jun 2026 17:00:00 +0200
secubox-waf (1.2.4-1~bookworm1) bookworm; urgency=medium
* fix(waf): refuse unmapped hosts in the addon copy — close the open

View File

@ -0,0 +1,231 @@
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
#
# #607 — behind-WAF media proxy-cache for the mitmproxy inspection LXC.
# Cacheable GET media/static (image / video / audio / font / css / js) served
# by our own vhosts is stored on disk keyed by URL and served from cache on
# repeat requests — cutting backend load + latency for hosted media
# (peertube / photoprism / nextcloud …). NOT a WAF bypass: the request still
# passes secubox_waf inspection (request hook runs first); only the response
# BODY is served from a cache the WAF itself populated from inspected
# responses. Fail-open everywhere — a cache error never breaks the flow.
from __future__ import annotations
import hashlib
import json
import os
import re
import time
from mitmproxy import http
CACHE_DIR = "/data/mitmproxy/cache/media"
STATS = "/data/mitmproxy/logs/media_cache.json"
CONFIG = "/data/mitmproxy/media-cache.json" # {"enabled": true} — default on
MAX_OBJ = 16 * 1024 * 1024 # 16 MB / object
MAX_TOTAL = 2 * 1024 * 1024 * 1024 # 2 GB on disk
DEFAULT_TTL = 3600 # 1 h when upstream gives no max-age
_CACHEABLE = ("image/", "video/", "audio/", "font/", "text/css",
"javascript", "ecmascript", "application/font",
"application/vnd.ms-fontobject")
_MAXAGE = re.compile(r"max-age\s*=\s*(\d+)", re.IGNORECASE)
_index: dict = {}
_total = 0
_stats = {"hits": 0, "misses": 0, "stored": 0, "evicted": 0,
"bytes_served": 0, "since": int(time.time())}
_last_flush = 0.0
_cfg = {"enabled": True}
_cfg_mtime = 0.0
def _key(url: str) -> str:
return hashlib.sha256(url.encode("utf-8", "ignore")).hexdigest()
def _paths(key: str):
d = os.path.join(CACHE_DIR, key[:2])
return os.path.join(d, key), os.path.join(d, key + ".m")
def _enabled() -> bool:
global _cfg, _cfg_mtime
try:
st = os.stat(CONFIG)
if st.st_mtime != _cfg_mtime:
_cfg_mtime = st.st_mtime
with open(CONFIG, encoding="utf-8") as f:
_cfg = json.load(f)
except FileNotFoundError:
pass
except Exception:
return True
return bool(_cfg.get("enabled", True))
def _cacheable_ct(ct: str) -> bool:
ct = (ct or "").split(";", 1)[0].strip().lower()
return bool(ct) and any(f in ct for f in _CACHEABLE)
def _flush_stats(force: bool = False) -> None:
global _last_flush
now = time.time()
if not force and (now - _last_flush) < 5:
return
_last_flush = now
try:
os.makedirs(os.path.dirname(STATS), exist_ok=True)
with open(STATS, "w", encoding="utf-8") as f:
json.dump({**_stats, "objects": len(_index),
"bytes_cached": _total, "updated": int(now)}, f)
except Exception:
pass
def _load_index() -> None:
global _total
try:
for sub in os.listdir(CACHE_DIR):
d = os.path.join(CACHE_DIR, sub)
if not os.path.isdir(d):
continue
for name in os.listdir(d):
if name.endswith(".m"):
continue
fp = os.path.join(d, name)
try:
st = os.stat(fp)
meta = {}
mp = fp + ".m"
if os.path.exists(mp):
with open(mp, encoding="utf-8") as mf:
meta = json.load(mf)
_index[name] = {"size": st.st_size, "exp": meta.get("exp", 0),
"atime": st.st_atime, "ct": meta.get("ct", "")}
_total += st.st_size
except Exception:
pass
except FileNotFoundError:
pass
def _evict_if_needed() -> None:
global _total
if _total <= MAX_TOTAL:
return
for key, e in sorted(_index.items(), key=lambda kv: kv[1]["atime"]):
if _total <= MAX_TOTAL:
break
body, meta = _paths(key)
for p in (body, meta):
try:
os.remove(p)
except OSError:
pass
_total -= e["size"]
_index.pop(key, None)
_stats["evicted"] += 1
class MediaCache:
def __init__(self):
try:
os.makedirs(CACHE_DIR, exist_ok=True)
_load_index()
except Exception:
pass
def request(self, flow: http.HTTPFlow) -> None:
if not _enabled():
return
r = flow.request
if r.method != "GET" or "range" in r.headers or "authorization" in r.headers:
return
key = _key(r.pretty_url or "")
e = _index.get(key)
if not e:
_stats["misses"] += 1
return
if e["exp"] and e["exp"] < time.time():
return
body_path, _m = _paths(key)
try:
with open(body_path, "rb") as f:
body = f.read()
except OSError:
_index.pop(key, None)
return
e["atime"] = time.time()
try:
os.utime(body_path, None)
except OSError:
pass
_stats["hits"] += 1
_stats["bytes_served"] += len(body)
_flush_stats()
flow.response = http.Response.make(
200, body,
{"Content-Type": e.get("ct") or "application/octet-stream",
"X-SecuBox-Cache": "HIT",
"Cache-Control": "public, max-age=300"},
)
def response(self, flow: http.HTTPFlow) -> None:
global _total
if not _enabled() or not flow.response:
return
r = flow.request
resp = flow.response
if r.method != "GET" or resp.status_code != 200:
return
if "range" in r.headers or "authorization" in r.headers:
return
if resp.headers.get("x-secubox-cache") == "HIT":
return
cc = (resp.headers.get("cache-control", "") or "").lower()
if "no-store" in cc or "private" in cc or "set-cookie" in resp.headers:
return
if not _cacheable_ct(resp.headers.get("content-type", "")):
return
try:
clen = int(resp.headers.get("content-length", "0") or "0")
except (TypeError, ValueError):
clen = 0
if clen > MAX_OBJ: # header short-circuit; body-size gate below covers chunked
return
try:
body = resp.content or b""
except Exception:
return
if not body or len(body) > MAX_OBJ:
return
m = _MAXAGE.search(cc)
ttl = int(m.group(1)) if m else DEFAULT_TTL
if ttl <= 0:
return
key = _key(r.pretty_url or "")
body_path, meta_path = _paths(key)
ct = (resp.headers.get("content-type", "") or "").split(";")[0]
try:
os.makedirs(os.path.dirname(body_path), exist_ok=True)
tmp = body_path + ".tmp"
with open(tmp, "wb") as f:
f.write(body)
os.replace(tmp, body_path)
with open(meta_path, "w", encoding="utf-8") as f:
json.dump({"ct": ct, "exp": time.time() + ttl,
"url": (r.pretty_url or "")[:300]}, f)
except Exception:
return
old = _index.get(key, {}).get("size", 0)
_total += len(body) - old
_index[key] = {"size": len(body), "exp": time.time() + ttl,
"atime": time.time(), "ct": ct}
_stats["stored"] += 1
_evict_if_needed()
_flush_stats()
addons = [MediaCache()]

View File

@ -119,7 +119,8 @@ CONF
lxc-attach -n "$LXC_NAME" -- apt-get update
lxc-attach -n "$LXC_NAME" -- apt-get install -y python3-pip python3-venv curl jq
lxc-attach -n "$LXC_NAME" -- mkdir -p /opt/mitmproxy /data/mitmproxy /var/log/mitmproxy
lxc-attach -n "$LXC_NAME" -- mkdir -p /opt/mitmproxy /data/mitmproxy /var/log/mitmproxy \
/data/mitmproxy/cache/media /data/mitmproxy/logs # #607 media cache + stats
lxc-attach -n "$LXC_NAME" -- python3 -m venv /opt/mitmproxy
lxc-attach -n "$LXC_NAME" -- /opt/mitmproxy/bin/pip install mitmproxy

View File

@ -21,7 +21,8 @@ ExecStart=/opt/mitmproxy/bin/mitmdump \
--listen-host 0.0.0.0 \
--listen-port 8080 \
--set confdir=/data/mitmproxy \
--scripts /data/mitmproxy/secubox_waf.py
-s /data/mitmproxy/secubox_waf.py \
-s /data/mitmproxy/media_cache.py
Restart=on-failure
RestartSec=5