mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-29 21:38:35 +00:00
Compare commits
3 Commits
2f9b16f05a
...
a44d9c51d8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a44d9c51d8 | ||
| 013385a6c4 | |||
| 6ee7fe3cbc |
|
|
@ -1,3 +1,16 @@
|
|||
secubox-toolbox (2.6.53-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* perf(#646): adaptive Accept-Encoding strip in inject_banner. Keep gzip/br by
|
||||
default; only force identity (for stream-injecting the loader) on hosts
|
||||
proven streaming-eligible (top-level html, 2xx-3xx, r2/r3, banner on, NOT
|
||||
CSP-strict) on a prior response. CSP-strict / heavy sites stay compressed
|
||||
instead of being pulled uncompressed (3-5x bytes) through the GIL-bound R3
|
||||
worker for zero benefit — cuts per-document CPU + transfer on the slow path.
|
||||
No feature loss: banner still injects via the buffer path; streaming TTFB
|
||||
win preserved on eligible hosts after the first visit.
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Thu, 18 Jun 2026 11:00:00 +0200
|
||||
|
||||
secubox-toolbox (2.6.52-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* perf(#644): /admin/clients/rich enriches only the ENRICH_LIMIT (12)
|
||||
|
|
|
|||
|
|
@ -671,6 +671,26 @@ _MAX_INJECT_BYTES = 2 * 1024 * 1024 # Phase 10 perf cap : skip injection on hug
|
|||
# critical path. Gated, fail-open: any miss falls back to passthrough (no
|
||||
# banner on that page) or to the legacy buffer path when the body is compressed.
|
||||
|
||||
# ── #646 : adaptive Accept-Encoding strip ───────────────────────────────────
|
||||
# Forcing identity on EVERY document pulled CSP-strict / heavy pages
|
||||
# uncompressed (3-5x bytes) through the GIL-bound worker for ZERO benefit —
|
||||
# streaming is disqualified on those pages anyway. So we keep gzip/br by
|
||||
# default and only strip Accept-Encoding for hosts we've PROVEN
|
||||
# streaming-eligible (top-level html + 2xx-3xx + r2/r3 + banner on + NOT
|
||||
# CSP-strict) on a prior response. CSP-strict / non-doc hosts stay compressed
|
||||
# forever (banner still injects via the buffer path, which decompresses fine).
|
||||
# Per-process, in-memory, size-capped, self-healing — verdicts re-learn cheaply.
|
||||
_STREAM_VERDICT: dict = {} # host -> bool (True = strip identity & stream)
|
||||
_STREAM_VERDICT_MAX = 8192
|
||||
|
||||
|
||||
def _record_stream_verdict(host: str, eligible: bool) -> None:
|
||||
if not host:
|
||||
return
|
||||
if len(_STREAM_VERDICT) >= _STREAM_VERDICT_MAX:
|
||||
_STREAM_VERDICT.clear() # crude self-heal; cheap to re-learn
|
||||
_STREAM_VERDICT[host] = eligible
|
||||
|
||||
def _stream_enabled() -> bool:
|
||||
try:
|
||||
import sys as _sys
|
||||
|
|
@ -760,15 +780,18 @@ class InjectBanner:
|
|||
return
|
||||
except Exception as e:
|
||||
log.warning("toolbox asset serve failed for %s: %s", flow.request.path, e)
|
||||
# #620 : for top-level HTML navigations, ask upstream for identity
|
||||
# encoding so we can stream-inject the loader without decompressing.
|
||||
# #620/#646 : for top-level HTML navigations to hosts PROVEN
|
||||
# streaming-eligible, ask upstream for identity encoding so we can
|
||||
# stream-inject the loader without decompressing. Unknown / CSP-strict
|
||||
# hosts keep their gzip/br compression (learned in responseheaders).
|
||||
if not _stream_enabled():
|
||||
return
|
||||
try:
|
||||
req = flow.request
|
||||
accept = (req.headers.get("accept", "") or "").lower()
|
||||
dest = (req.headers.get("sec-fetch-dest", "") or "").lower()
|
||||
if dest == "document" or "text/html" in accept:
|
||||
is_doc = dest == "document" or "text/html" in accept
|
||||
if is_doc and _STREAM_VERDICT.get(flow.request.pretty_host or ""):
|
||||
if "accept-encoding" in req.headers:
|
||||
req.headers["accept-encoding"] = "identity"
|
||||
except Exception:
|
||||
|
|
@ -787,10 +810,6 @@ class InjectBanner:
|
|||
return
|
||||
if resp.status_code < 200 or resp.status_code >= 400:
|
||||
return
|
||||
# Compressed (upstream ignored our identity request) → let the buffer
|
||||
# path handle it (mitmproxy auto-decodes there). Don't stream.
|
||||
if resp.headers.get("content-encoding"):
|
||||
return
|
||||
if _client_level(flow) not in ("r2", "r3"):
|
||||
return
|
||||
try:
|
||||
|
|
@ -802,15 +821,20 @@ class InjectBanner:
|
|||
return
|
||||
except Exception:
|
||||
pass
|
||||
# #636 — strict CSP would block the injected loader <script> and its
|
||||
# /__toolbox/bundle fetch → no banner. Don't stream; fall through to the
|
||||
# legacy buffer path, which injects an inline-CSS banner (no script/fetch)
|
||||
# that survives strict CSP.
|
||||
if _detect_csp_strict(flow):
|
||||
# #646 — learn per-host streaming eligibility from THIS response, so the
|
||||
# next visit's request() knows whether to strip Accept-Encoding. This is
|
||||
# independent of the current encoding: a host eligible today is worth
|
||||
# asking identity from tomorrow. #636 — strict CSP blocks the injected
|
||||
# loader <script>; #639 — only top-level navigations get the banner.
|
||||
# Both disqualify streaming → ineligible → keep compression.
|
||||
eligible = _is_top_level_document(flow) and not _detect_csp_strict(flow)
|
||||
_record_stream_verdict(flow.request.pretty_host or "", eligible)
|
||||
if not eligible:
|
||||
return
|
||||
# #639 — only inject into top-level navigations; iframes/sub-documents
|
||||
# each get their own responseheaders call → multiple banners per visit.
|
||||
if not _is_top_level_document(flow):
|
||||
# Compressed (upstream ignored our identity request, or this is the first
|
||||
# visit before we'd learned to ask identity) → let the buffer path handle
|
||||
# it (mitmproxy auto-decodes there). Can only stream an identity body.
|
||||
if resp.headers.get("content-encoding"):
|
||||
return
|
||||
try:
|
||||
resp.stream = _LoaderInjector(_loader_script(flow))
|
||||
|
|
|
|||
113
packages/secubox-toolbox/tests/test_banner_adaptive_encoding.py
Normal file
113
packages/secubox-toolbox/tests/test_banner_adaptive_encoding.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
"""#646 — adaptive Accept-Encoding strip.
|
||||
|
||||
The loader stream-inject needs an identity-encoded body, but forcing identity on
|
||||
EVERY document pulls CSP-strict/heavy pages uncompressed for no benefit. We only
|
||||
strip Accept-Encoding for hosts proven streaming-eligible on a prior visit;
|
||||
unknown/ineligible hosts keep gzip/br.
|
||||
"""
|
||||
import sys
|
||||
import pathlib
|
||||
import importlib
|
||||
import json
|
||||
|
||||
ADDON_DIR = pathlib.Path(__file__).resolve().parents[1] / "mitmproxy_addons"
|
||||
sys.path.insert(0, str(ADDON_DIR))
|
||||
|
||||
from mitmproxy.test import tflow, tutils # noqa: E402
|
||||
from secubox_toolbox import filters # noqa: E402
|
||||
|
||||
|
||||
def _addon(monkeypatch, tmp_path):
|
||||
fp = tmp_path / "filters.json"
|
||||
fp.write_text(json.dumps({"banner": True, "stream_inject": True}))
|
||||
monkeypatch.setattr(filters, "FILTERS_PATH", str(fp))
|
||||
filters.get_filters(force=True)
|
||||
import inject_banner
|
||||
importlib.reload(inject_banner)
|
||||
monkeypatch.setattr(inject_banner, "_client_level", lambda flow: "r3")
|
||||
inject_banner._STREAM_VERDICT.clear()
|
||||
return inject_banner
|
||||
|
||||
|
||||
def _doc_request(host="example.com"):
|
||||
f = tflow.tflow()
|
||||
f.request.host = host
|
||||
f.request.headers["accept"] = "text/html,application/xhtml+xml"
|
||||
f.request.headers["accept-encoding"] = "gzip, br"
|
||||
f.request.headers["sec-fetch-dest"] = "document"
|
||||
return f
|
||||
|
||||
|
||||
def _html_response(host="example.com"):
|
||||
f = tflow.tflow(resp=tutils.tresp())
|
||||
f.request.host = host
|
||||
f.response.headers["content-type"] = "text/html; charset=utf-8"
|
||||
f.response.status_code = 200
|
||||
f.request.headers["sec-fetch-dest"] = "document"
|
||||
return f
|
||||
|
||||
|
||||
def test_unknown_host_keeps_compression(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
f = _doc_request()
|
||||
ib.InjectBanner().request(f)
|
||||
assert f.request.headers["accept-encoding"] == "gzip, br" # NOT stripped
|
||||
|
||||
|
||||
def test_eligible_host_strips_identity(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
ib._STREAM_VERDICT["example.com"] = True
|
||||
f = _doc_request()
|
||||
ib.InjectBanner().request(f)
|
||||
assert f.request.headers["accept-encoding"] == "identity"
|
||||
|
||||
|
||||
def test_ineligible_host_keeps_compression(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
ib._STREAM_VERDICT["example.com"] = False
|
||||
f = _doc_request()
|
||||
ib.InjectBanner().request(f)
|
||||
assert f.request.headers["accept-encoding"] == "gzip, br"
|
||||
|
||||
|
||||
def test_responseheaders_records_eligible(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
f = _html_response()
|
||||
ib.InjectBanner().responseheaders(f)
|
||||
assert ib._STREAM_VERDICT.get("example.com") is True
|
||||
|
||||
|
||||
def test_responseheaders_records_ineligible_for_csp_strict(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
monkeypatch.setattr(ib, "_detect_csp_strict", lambda flow: True)
|
||||
f = _html_response(host="strict.example.com")
|
||||
ib.InjectBanner().responseheaders(f)
|
||||
assert ib._STREAM_VERDICT.get("strict.example.com") is False
|
||||
assert not f.metadata.get("sbx_streamed") # CSP-strict → buffer path, not stream
|
||||
|
||||
|
||||
def test_learn_then_strip_end_to_end(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
# First visit: unknown host → not stripped.
|
||||
r1 = _doc_request()
|
||||
ib.InjectBanner().request(r1)
|
||||
assert r1.request.headers["accept-encoding"] == "gzip, br"
|
||||
# Response observed → host learned eligible.
|
||||
ib.InjectBanner().responseheaders(_html_response())
|
||||
assert ib._STREAM_VERDICT.get("example.com") is True
|
||||
# Second visit: now stripped → streaming will engage.
|
||||
r2 = _doc_request()
|
||||
ib.InjectBanner().request(r2)
|
||||
assert r2.request.headers["accept-encoding"] == "identity"
|
||||
|
||||
|
||||
def test_verdict_cache_self_heals_on_overflow(monkeypatch, tmp_path):
|
||||
ib = _addon(monkeypatch, tmp_path)
|
||||
monkeypatch.setattr(ib, "_STREAM_VERDICT_MAX", 4)
|
||||
for i in range(4):
|
||||
ib._record_stream_verdict(f"h{i}", True)
|
||||
assert len(ib._STREAM_VERDICT) == 4
|
||||
ib._record_stream_verdict("h4", True) # overflow → clear then add
|
||||
assert len(ib._STREAM_VERDICT) == 1
|
||||
assert ib._STREAM_VERDICT.get("h4") is True
|
||||
Loading…
Reference in New Issue
Block a user