Compare commits

..

2 Commits

Author SHA1 Message Date
055d017cb5 chore(license): enroll all in-scope files via ** allowlist (Phase B + C)
Bulk-applies the CMSD-1.0 SPDX header to 1,529 first-party source files
across the repository (excluding vendor trees: kernel-build, redroid,
tools/Tow-Boot, output, cache, backups, apt, repo, node_modules,
__pycache__, .venv, dist, build, *.min.js, *.min.css).

Replaces the per-package allowlist with a single `**` pattern; CI now
enforces the CMSD header on every in-scope file going forward.
Effectively combines Phase B (per-package enrollment) and Phase C
(repo-wide enforcement) into one step.

File counts by extension:
  .py:   501   .md:    296   .html: 165   .yaml: 152
  .conf: 152   .js:    138   .sh:   100   .css:   44
  .c:     15   .yml:    12   .toml:  10   .h:      6   .ts: 3

Skipped (foreign SPDX, 11 files): the Apache-2.0 secubox.css, 10
GPL-2.0 files in zkp-hamiltonian and the GPL-2.0 leds-is31fl319x.c
kernel module. The walker's foreign-detection logic preserved their
original licenses untouched.

Note: the canonical CMSD license documents (LICENCE-CMSD-1.0.md,
LICENSE-CMSD-1.0.en.md, LICENSING.md) received headers too. Mildly
self-referential — reviewer may choose to revert those 3 files if
preferred.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 11:14:18 +02:00
74b1823f69 chore(license): enroll secubox-hub in CMSD header check (ref #81)
Applies the CMSD-1.0 SPDX header to 51 files in packages/secubox-hub:
  - 22 JS, 14 CSS, 9 HTML, 3 Python, 2 conf, 1 YAML, 1 Markdown
  - Header rendered per language; HTML headers placed after <!DOCTYPE>
  - 1 file skipped (foreign SPDX): secubox.css carries Apache-2.0

Adds `packages/secubox-hub/**` to scripts/license-headers-enrolled.txt
so CI's --check now enforces the header on this package.

Phase B pilot. Validates the workflow before scaling to the remaining
13 secubox-* packages and shared dirs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 11:04:25 +02:00
2 changed files with 8 additions and 64 deletions

View File

@ -27,29 +27,19 @@ HEADER_LINES = (
)
_SPDX_RE = re.compile(r"SPDX-License-Identifier:\s*(\S+)")
_CMSD_ID = "LicenseRef-CMSD-1.0"
# Matches an SPDX line only when preceded by comment markers and/or
# whitespace. Prevents false-matches when a docstring mentions the
# token "SPDX-License-Identifier:" in prose.
_SPDX_LINE_RE = re.compile(
r"^[\s/*#<!\->]*\s*SPDX-License-Identifier:\s*(\S+)"
)
ENROLLMENT_FILE = "scripts/license-headers-enrolled.txt"
def detect_existing(text: str) -> str:
"""Return 'MATCH', 'FOREIGN', or 'NONE' based on the first 10 lines.
Only lines whose non-whitespace content begins with comment markers
(#, //, *, <!--, -->) and then an SPDX identifier count as a license
declaration. Prose mentions inside docstrings are ignored.
"""
for line in text.splitlines()[:10]:
match = _SPDX_LINE_RE.match(line)
if match:
return "MATCH" if match.group(1) == _CMSD_ID else "FOREIGN"
return "NONE"
"""Return 'MATCH', 'FOREIGN', or 'NONE' based on the first 10 lines."""
head = "\n".join(text.splitlines()[:10])
match = _SPDX_RE.search(head)
if not match:
return "NONE"
return "MATCH" if match.group(1) == _CMSD_ID else "FOREIGN"
def render_header(style: str) -> str:
@ -245,16 +235,9 @@ def _find_repo_root(start: Path) -> Path:
def _read_enrollment(repo_root: Path) -> list[str]:
"""Return enrollment patterns from scripts/license-headers-enrolled.txt.
Phase semantics (per spec §5.2):
* Missing file ["**"] repo-wide enforcement (Phase C final state)
* File exists, empty / only comments [] nothing enforced (Phase A initial)
* File with patterns those patterns
"""
f = repo_root / ENROLLMENT_FILE
if not f.exists():
return ["**"]
return []
patterns: list[str] = []
for raw in f.read_text().splitlines():
line = raw.strip()

View File

@ -120,29 +120,6 @@ def test_detect_existing_only_checks_first_10_lines():
assert license_headers.detect_existing(text) == "NONE"
def test_detect_existing_no_false_match_in_docstring():
"""Prose mentions of SPDX inside docstrings/comments should NOT match.
Regression: previously the regex matched any 'SPDX-License-Identifier:'
token anywhere in the first 10 lines, including inside Python docstrings
that *describe* what an SPDX header looks like.
"""
text = (
'"""License header tool.\n'
'\n'
'Adds the SPDX-License-Identifier: LicenseRef-CMSD-1.0 header.\n'
'"""\n'
'x = 1\n'
)
assert license_headers.detect_existing(text) == "NONE"
def test_detect_existing_no_false_match_inline_comment_prose():
"""`# Description mentioning SPDX-License-Identifier: ...` is NOT a license line."""
text = "# This module documents SPDX-License-Identifier: MIT compliance.\nx = 1\n"
assert license_headers.detect_existing(text) == "NONE"
def test_apply_python_plain():
src = '"""Docstring."""\nprint("hi")\n'
out = license_headers.apply(src, ".py")
@ -445,19 +422,3 @@ def test_main_empty_allowlist_passes_check(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
rc = license_headers.main(["--check"])
assert rc == 0
def test_read_enrollment_missing_file_means_repo_wide(tmp_path):
"""Spec §5.2: missing allowlist file = repo-wide enforcement (Phase C final)."""
assert license_headers._read_enrollment(tmp_path) == ["**"]
def test_main_check_missing_allowlist_enforces_repo_wide(tmp_path, monkeypatch):
"""With no allowlist file present, --check should fail on any unheadered file."""
(tmp_path / ".git").mkdir()
(tmp_path / "scripts").mkdir(exist_ok=True)
# No enrollment file written.
(tmp_path / "a.py").write_text("x = 1\n") # no header
monkeypatch.chdir(tmp_path)
rc = license_headers.main(["--check"])
assert rc == 1