mirror of
https://github.com/CyberMind-FR/secubox-deb.git
synced 2026-06-30 08:00:54 +00:00
Compare commits
No commits in common. "2523333fc8f6b07c9bcad0d749a0e579610394c4" and "630cb81e03d8696d2dc0907c146948e1ae174c15" have entirely different histories.
2523333fc8
...
630cb81e03
|
|
@ -1,27 +1,3 @@
|
|||
secubox-toolbox (2.6.36-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* fix(autolearn): exclude anti-bot vendors from the auto-block list (#589
|
||||
follow-up). Anti-bot WADs (Datadome/PerimeterX) often sit in the visited
|
||||
site's own path, so auto-blocking them would break the page. The learner
|
||||
now feeds only OPERATOR-GRADE/data-broker classified trackers (+ threat-
|
||||
intel domains); cross-site threshold lowered 4→2.
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Sun, 14 Jun 2026 16:50:00 +0200
|
||||
|
||||
secubox-toolbox (2.6.35-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* Autolearn bad trackers/actors (#589) — feeds ad_ghost's block set.
|
||||
- sbin/secubox-toolbox-autolearn (+ hourly timer) builds a HIGH-
|
||||
confidence list /var/lib/secubox/toolbox/learned-trackers.txt from
|
||||
(1) threat-intel domain IOCs (threatfox malicious) and (2) cross-site
|
||||
domains CLASSIFIED anti-bot/operator-grade seen on >=4 sites.
|
||||
Conservative — plain cross-site CDNs are NOT learned.
|
||||
- ad_ghost.py loads it (mtime-cached) and 204s learned hosts too
|
||||
(X-SecuBox-Ghost: learned), gated by the new `autolearn` filter
|
||||
(default on). postinst enables the timer + runs once.
|
||||
|
||||
-- Gerald KERMA <devel@cybermind.fr> Sun, 14 Jun 2026 16:30:00 +0200
|
||||
|
||||
secubox-toolbox (2.6.34-1~bookworm1) bookworm; urgency=medium
|
||||
|
||||
* Cartographie: domain-nugget cloud view (#587). New "🏷️ Domaines" toggle
|
||||
|
|
|
|||
|
|
@ -211,11 +211,6 @@ fi
|
|||
# until the operator opts in via a SECUBOX_ESCALATE_* drop-in.
|
||||
systemctl enable secubox-escalate.timer 2>/dev/null || true
|
||||
systemctl start secubox-escalate.timer 2>/dev/null || true
|
||||
# #589 : autolearn bad-tracker timer (hourly) + a first run now so
|
||||
# the learned list exists immediately for ad_ghost.
|
||||
systemctl enable secubox-toolbox-autolearn.timer 2>/dev/null || true
|
||||
systemctl start secubox-toolbox-autolearn.timer 2>/dev/null || true
|
||||
/usr/sbin/secubox-toolbox-autolearn 2>&1 | head -1 || true
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -35,10 +35,6 @@ override_dh_installsystemd:
|
|||
# Install the secondary unit manually (dh_installsystemd expects 1 unit/pkg).
|
||||
install -d debian/secubox-toolbox/lib/systemd/system
|
||||
install -m 0644 systemd/secubox-toolbox-mitm.service debian/secubox-toolbox/lib/systemd/system/
|
||||
# #589 : autolearn bad-tracker learner + hourly timer
|
||||
install -m 0755 sbin/secubox-toolbox-autolearn debian/secubox-toolbox/usr/sbin/
|
||||
install -m 0644 systemd/secubox-toolbox-autolearn.service debian/secubox-toolbox/lib/systemd/system/
|
||||
install -m 0644 systemd/secubox-toolbox-autolearn.timer debian/secubox-toolbox/lib/systemd/system/
|
||||
# Phase 6.P (#496) : systemd drop-ins for RuntimeMaxSec=6h on mitm + mitm-wg
|
||||
install -d debian/secubox-toolbox/lib/systemd/system/secubox-toolbox-mitm.service.d
|
||||
install -m 0644 systemd/secubox-toolbox-mitm.service.d/10-runtime-max.conf \
|
||||
|
|
|
|||
|
|
@ -46,43 +46,6 @@ _AD_HOST = re.compile(
|
|||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# #589 — auto-learned bad hosts (threat-intel + classified cross-site
|
||||
# trackers), rebuilt hourly by secubox-toolbox-autolearn. Loaded with a
|
||||
# mtime check so a fresh learn takes effect within ~60 s, no restart.
|
||||
_LEARNED_PATH = "/var/lib/secubox/toolbox/learned-trackers.txt"
|
||||
_learned: set = set()
|
||||
_learned_mtime = 0.0
|
||||
_learned_check = 0.0
|
||||
_2L_TLD = ("co.uk", "com.au", "co.jp", "co.nz", "com.br", "co.za", "gouv.fr")
|
||||
|
||||
|
||||
def _registrable(host: str):
|
||||
host = (host or "").split(":")[0].lower().strip(".")
|
||||
if not host or host.replace(".", "").isdigit() or ":" in host:
|
||||
return None
|
||||
p = host.split(".")
|
||||
if len(p) <= 2:
|
||||
return host
|
||||
last2 = ".".join(p[-2:])
|
||||
return ".".join(p[-3:]) if (last2 in _2L_TLD and len(p) >= 3) else last2
|
||||
|
||||
|
||||
def _learned_set() -> set:
|
||||
global _learned, _learned_mtime, _learned_check
|
||||
now = time.time()
|
||||
if now - _learned_check < 60:
|
||||
return _learned
|
||||
_learned_check = now
|
||||
try:
|
||||
m = os.path.getmtime(_LEARNED_PATH)
|
||||
if m != _learned_mtime:
|
||||
with open(_LEARNED_PATH, encoding="utf-8") as f:
|
||||
_learned = {ln.strip().lower() for ln in f if ln.strip()}
|
||||
_learned_mtime = m
|
||||
except Exception:
|
||||
pass
|
||||
return _learned
|
||||
|
||||
# Cosmetic hide selectors, grouped so the WebUI can toggle each category.
|
||||
_COSMETIC = {
|
||||
"ads": (
|
||||
|
|
@ -161,18 +124,10 @@ class AdGhost:
|
|||
if not _is_r3plus(flow):
|
||||
return
|
||||
host = flow.request.pretty_host or ""
|
||||
blocked = bool(_AD_HOST.search(host))
|
||||
learned = False
|
||||
if not blocked and f.get("autolearn", True):
|
||||
reg = _registrable(host)
|
||||
if reg and (reg in _learned_set() or host.lower() in _learned_set()):
|
||||
blocked = learned = True
|
||||
if blocked:
|
||||
if _AD_HOST.search(host):
|
||||
flow.response = http.Response.make(
|
||||
204, b"", {"X-SecuBox-Ghost": "learned" if learned else "blocked"})
|
||||
204, b"", {"X-SecuBox-Ghost": "blocked"})
|
||||
_counts["blocked_requests"] += 1
|
||||
if learned:
|
||||
_counts["learned_blocks"] = _counts.get("learned_blocks", 0) + 1
|
||||
_counts["bytes_saved_est"] += _EST_BYTES_PER_REQ
|
||||
_flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,106 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: LicenseRef-CMSD-1.0
|
||||
# Copyright (c) 2026 CyberMind — Gérald Kerma <devel@cybermind.fr>
|
||||
#
|
||||
# #589 — autolearn bad trackers/actors. Builds a HIGH-CONFIDENCE block list
|
||||
# that ad_ghost consults (in addition to its static ad-host regex), from:
|
||||
# 1. threat-intel domain IOCs (threatfox malicious C2/malware domains) ;
|
||||
# 2. cross-site OPERATOR-GRADE / data-broker tracker domains
|
||||
# (social_host_meta.opgrade_vendor) seen on >= MIN_SITES sites.
|
||||
# Deliberately conservative — a plain cross-site CDN (fonts, shared assets)
|
||||
# is NOT learned, and ANTI-BOT vendors are NOT learned either : a site's own
|
||||
# WAF (Datadome/PerimeterX) sits in the 1st-party path, so blocking it would
|
||||
# break the site. So live R3 users don't get legit sites broken. Run hourly
|
||||
# by secubox-toolbox-autolearn.timer ; output read by ad_ghost (cached).
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
|
||||
DB = "/var/lib/secubox/toolbox/toolbox.db"
|
||||
OUT = "/var/lib/secubox/toolbox/learned-trackers.txt"
|
||||
MIN_SITES = 2 # cross-site threshold for operator-grade trackers
|
||||
MAX_ENTRIES = 8000
|
||||
_2L = ("co.uk", "com.au", "co.jp", "co.nz", "com.br", "co.za", "gouv.fr")
|
||||
|
||||
|
||||
def registrable(host: str):
|
||||
host = (host or "").split(":")[0].lower().strip(".")
|
||||
if not host or host.replace(".", "").isdigit() or ":" in host:
|
||||
return None
|
||||
p = host.split(".")
|
||||
if len(p) <= 2:
|
||||
return host
|
||||
last2 = ".".join(p[-2:])
|
||||
return ".".join(p[-3:]) if (last2 in _2L and len(p) >= 3) else last2
|
||||
|
||||
|
||||
def main() -> int:
|
||||
learned: set[str] = set()
|
||||
try:
|
||||
c = sqlite3.connect(DB, timeout=10)
|
||||
c.row_factory = sqlite3.Row
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"autolearn: cannot open {DB}: {e}\n")
|
||||
return 0
|
||||
|
||||
# 1) threat-intel malicious domains (high confidence).
|
||||
try:
|
||||
for r in c.execute("SELECT DISTINCT ioc FROM threat_intel WHERE type='domain'"):
|
||||
d = registrable(r["ioc"])
|
||||
if d:
|
||||
learned.add(d)
|
||||
except Exception:
|
||||
pass
|
||||
ti = len(learned)
|
||||
|
||||
# 2) cross-site OPERATOR-GRADE / data-broker trackers ONLY. Anti-bot
|
||||
# vendors are deliberately excluded — they're frequently the visited
|
||||
# site's own WAF (in-path), so blocking them breaks the page.
|
||||
try:
|
||||
classified = set()
|
||||
for r in c.execute(
|
||||
"SELECT tracker_domain FROM social_host_meta "
|
||||
"WHERE opgrade_vendor IS NOT NULL"):
|
||||
d = registrable(r["tracker_domain"])
|
||||
if d:
|
||||
classified.add(d)
|
||||
# distinct 1st-party sites per registrable tracker domain
|
||||
sites: dict[str, set] = {}
|
||||
for r in c.execute("SELECT tracker_domain, sites_jsonl FROM social_nodes"):
|
||||
d = registrable(r["tracker_domain"])
|
||||
if not d or d not in classified:
|
||||
continue
|
||||
try:
|
||||
for s in json.loads(r["sites_jsonl"] or "[]"):
|
||||
sites.setdefault(d, set()).add(s)
|
||||
except Exception:
|
||||
pass
|
||||
for d, ss in sites.items():
|
||||
if len(ss) >= MIN_SITES:
|
||||
learned.add(d)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.close()
|
||||
learned.discard(None)
|
||||
out = sorted(learned)[:MAX_ENTRIES]
|
||||
try:
|
||||
tmp = OUT + ".tmp"
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(out) + ("\n" if out else ""))
|
||||
import os
|
||||
os.replace(tmp, OUT)
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"autolearn: write failed: {e}\n")
|
||||
return 0
|
||||
sys.stderr.write(
|
||||
f"autolearn: {len(out)} hosts learned ({ti} threat-intel + "
|
||||
f"{len(out) - ti} classified cross-site) @ {int(time.time())}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
@ -22,7 +22,6 @@ DEFAULTS: Dict = {
|
|||
"ad_ghost": True, # R3+/R4 silent ad/banner/widget ghosting
|
||||
"ad_ghost_block": True, # 204 known ad/tracker hosts (save bandwidth)
|
||||
"media_cache": False, # #577 shared media proxy-cache (opt-in)
|
||||
"autolearn": True, # #589 also block auto-learned bad hosts
|
||||
"ad_ghost_categories": { # cosmetic ghost groups
|
||||
"ads": True,
|
||||
"consent_nag": True,
|
||||
|
|
@ -74,7 +73,7 @@ def set_filters(patch: Dict) -> Dict:
|
|||
if ck in DEFAULTS["ad_ghost_categories"]})
|
||||
elif k == "protective" and v in _VALID_PROTECTIVE:
|
||||
cur["protective"] = v
|
||||
elif k in ("banner", "ad_ghost", "ad_ghost_block", "media_cache", "autolearn"):
|
||||
elif k in ("banner", "ad_ghost", "ad_ghost_block", "media_cache"):
|
||||
cur[k] = bool(v)
|
||||
try:
|
||||
os.makedirs(os.path.dirname(FILTERS_PATH), exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -1,12 +0,0 @@
|
|||
[Unit]
|
||||
Description=SecuBox ToolBoX — autolearn bad trackers/actors (#589)
|
||||
Documentation=https://github.com/CyberMind-FR/secubox-deb/issues/589
|
||||
After=secubox-toolbox.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/sbin/secubox-toolbox-autolearn
|
||||
Nice=10
|
||||
IOSchedulingClass=idle
|
||||
# best-effort, never block boot
|
||||
TimeoutStartSec=120
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
[Unit]
|
||||
Description=SecuBox ToolBoX — hourly autolearn of bad trackers (#589)
|
||||
|
||||
[Timer]
|
||||
OnBootSec=10min
|
||||
OnUnitActiveSec=1h
|
||||
Persistent=true
|
||||
RandomizedDelaySec=5min
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
Loading…
Reference in New Issue
Block a user