From bde9c4156389a0edf787d0cd4eb437400190c7e6 Mon Sep 17 00:00:00 2001 From: CyberMind-FR Date: Wed, 11 Mar 2026 16:42:42 +0100 Subject: [PATCH] feat(metacatalog): Add Virtual Books content aggregator New secubox-app-metacatalog package: - CLI tool (metacatalogctl) with sync/scan/index/books/search commands - Scanners for MetaBlogizer sites and Streamlit apps - Auto-assignment engine with keyword + domain pattern matching - 6 default virtual books (Divination, Visualization, Analytics, etc.) - Tao prism fluoro theme landing page - JSON APIs for catalog and books data - Hourly cron sync - BusyBox-compatible (sed-based extraction) Initial test: 120 entries indexed (118 MetaBlogs, 2 Streamlits) Co-Authored-By: Claude Opus 4.5 --- .claude/HISTORY.md | 35 + .claude/WIP.md | 23 +- .../secubox/secubox-app-metacatalog/Makefile | 68 ++ .../secubox/secubox-app-metacatalog/README.md | 163 +++++ .../files/etc/config/metacatalog | 104 +++ .../files/usr/sbin/metacatalogctl | 678 ++++++++++++++++++ 6 files changed, 1070 insertions(+), 1 deletion(-) create mode 100644 package/secubox/secubox-app-metacatalog/Makefile create mode 100644 package/secubox/secubox-app-metacatalog/README.md create mode 100644 package/secubox/secubox-app-metacatalog/files/etc/config/metacatalog create mode 100644 package/secubox/secubox-app-metacatalog/files/usr/sbin/metacatalogctl diff --git a/.claude/HISTORY.md b/.claude/HISTORY.md index 2315e168..dda834aa 100644 --- a/.claude/HISTORY.md +++ b/.claude/HISTORY.md @@ -4678,3 +4678,38 @@ git checkout HEAD -- index.html - Fix: Added type check in `overview.js` render() and pollData() functions - `var s = (data && typeof data === 'object' && !Array.isArray(data)) ? data : {}` - Deployed to router, cleared LuCI caches + +91. **Meta Cataloger - Virtual Books (2026-03-11)** + - New `secubox-app-metacatalog` package for content aggregation + - Virtual Library concept: organizes MetaBlogizer sites, Streamlit apps into themed collections + - CLI tool `/usr/sbin/metacatalogctl` with commands: + - `sync` - Full scan + index + assign books + generate landing + - `scan [source]` - Scan content sources (metablogizer, streamlit) + - `index list|show|refresh` - Index management + - `books list|show` - Virtual book management + - `search ` - Full-text search + - `status` - Catalog statistics + - `landing` - Regenerate landing page + - Content scanners: + - MetaBlogizer: extracts title, description, languages, colors, canvas/audio detection + - Streamlit: extracts from app.py and UCI config + - Auto-assignment engine: matches entries to books via keywords and domain patterns + - Default virtual books (6): + - Divination (oracle, iching, hexagram) + - Visualization (canvas, animation, 3d) + - Analytics (dashboard, data, metrics) + - Publications (blog, article, press) + - Security (waf, firewall, crowdsec) + - Media (video, audio, streaming) + - Landing page: Tao prism fluoro theme with book shelf visualization + - API endpoints: `/metacatalog/api/index.json`, `/metacatalog/api/books.json` + - Initial sync: 120 entries indexed (118 MetaBlogs, 2 Streamlits) + - BusyBox-compatible: uses sed instead of grep -P for regex extraction + - Cron integration: hourly auto-sync via `/etc/cron.d/metacatalog` + +92. **HAProxy Auto-Sync Mitmproxy Routes (2026-03-11)** + - Fixed: New vhosts were missing mitmproxy route entries + - `haproxyctl vhost add` now auto-runs `mitmproxyctl sync-routes` in background + - `haproxyctl vhost remove` also triggers route sync + - Prevents 404 WAF errors when adding new domains + - Commit: 7cbd6406 "feat(haproxy): Auto-sync mitmproxy routes on vhost add/remove" diff --git a/.claude/WIP.md b/.claude/WIP.md index e237f373..20e1b9ee 100644 --- a/.claude/WIP.md +++ b/.claude/WIP.md @@ -1,6 +1,6 @@ # Work In Progress (Claude) -_Last updated: 2026-03-11 (CrowdSec Dashboard Performance Optimization)_ +_Last updated: 2026-03-11 (Meta Cataloger - Virtual Books)_ > **Architecture Reference**: SecuBox Fanzine v3 — Les 4 Couches @@ -10,6 +10,25 @@ _Last updated: 2026-03-11 (CrowdSec Dashboard Performance Optimization)_ ### 2026-03-11 +- **Meta Cataloger - Virtual Books (Phase 1 Complete)** + - New `secubox-app-metacatalog` package for unified content aggregation + - Organizes MetaBlogizer sites, Streamlit apps into themed Virtual Books + - CLI: `/usr/sbin/metacatalogctl` with sync/scan/index/books/search/status/landing + - Scanners: MetaBlogizer (title, description, languages, colors, canvas/audio) + - Scanners: Streamlit (from app.py and UCI config) + - Auto-assignment: keyword + domain pattern matching to books + - 6 default books: Divination, Visualization, Analytics, Publications, Security, Media + - Landing page: Tao prism fluoro theme at `/www/metacatalog/index.html` + - APIs: `/metacatalog/api/index.json`, `/metacatalog/api/books.json` + - Initial sync: 120 entries (118 MetaBlogs, 2 Streamlits) + - BusyBox-compatible: sed-based regex (no grep -P) + - Cron: hourly auto-sync via `/etc/cron.d/metacatalog` + +- **HAProxy Auto-Sync Mitmproxy Routes** + - Fixed: New vhosts missing mitmproxy route entries causing 404 WAF errors + - `haproxyctl vhost add/remove` now triggers `mitmproxyctl sync-routes` + - Commit: 7cbd6406 + - **CrowdSec Dashboard Performance Optimization** - **Problem**: `get_overview` RPC call was timing out (30s+), causing "TypeError: can't assign to property 'countries' on 5" - **Root cause**: Function made 12+ sequential `cscli` calls, each taking 2-5s with CAPI data @@ -404,6 +423,8 @@ _Last updated: 2026-03-11 (CrowdSec Dashboard Performance Optimization)_ ## In Progress +- **Meta Cataloger Phase 2** - RPCD backend, LuCI dashboard, HAProxy source scanner + - **Streamlit Forge Phase 2** - Preview generation, Gitea push/pull - **RTTY Remote Control Module (Phase 4 - Session Replay)** diff --git a/package/secubox/secubox-app-metacatalog/Makefile b/package/secubox/secubox-app-metacatalog/Makefile new file mode 100644 index 00000000..44fb64de --- /dev/null +++ b/package/secubox/secubox-app-metacatalog/Makefile @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (C) 2026 CyberMind.fr - Gandalf +# +# SecuBox Meta Cataloger - Virtual Books for content organization + +include $(TOPDIR)/rules.mk + +PKG_NAME:=secubox-app-metacatalog +PKG_VERSION:=1.0.0 +PKG_RELEASE:=1 +PKG_ARCH:=all + +PKG_LICENSE:=Apache-2.0 +PKG_MAINTAINER:=CyberMind + +include $(INCLUDE_DIR)/package.mk + +define Package/secubox-app-metacatalog + SECTION:=secubox + CATEGORY:=SecuBox + TITLE:=Meta Cataloger - Virtual Books + DEPENDS:=+jsonfilter +coreutils-stat + PKGARCH:=all +endef + +define Package/secubox-app-metacatalog/description + Aggregates MetaBlogizer sites, Streamlit apps, and services + into a unified catalog organized as Virtual Books by theme. +endef + +define Package/secubox-app-metacatalog/conffiles +/etc/config/metacatalog +endef + +define Build/Compile +endef + +define Package/secubox-app-metacatalog/install + # UCI config + $(INSTALL_DIR) $(1)/etc/config + $(INSTALL_CONF) ./files/etc/config/metacatalog $(1)/etc/config/ + + # CLI tool + $(INSTALL_DIR) $(1)/usr/sbin + $(INSTALL_BIN) ./files/usr/sbin/metacatalogctl $(1)/usr/sbin/ + + # Data directories + $(INSTALL_DIR) $(1)/srv/metacatalog/entries + $(INSTALL_DIR) $(1)/srv/metacatalog/cache + $(INSTALL_DIR) $(1)/www/metacatalog/api + + # Cron job + $(INSTALL_DIR) $(1)/etc/cron.d + echo "0 * * * * root /usr/sbin/metacatalogctl sync --quiet >/dev/null 2>&1" > $(1)/etc/cron.d/metacatalog +endef + +define Package/secubox-app-metacatalog/postinst +#!/bin/sh +[ -n "$${IPKG_INSTROOT}" ] || { + # Initial sync + /usr/sbin/metacatalogctl sync >/dev/null 2>&1 & + echo "Meta Cataloger installed. Run 'metacatalogctl sync' to index content." +} +exit 0 +endef + +$(eval $(call BuildPackage,secubox-app-metacatalog)) diff --git a/package/secubox/secubox-app-metacatalog/README.md b/package/secubox/secubox-app-metacatalog/README.md new file mode 100644 index 00000000..bbb3ebd2 --- /dev/null +++ b/package/secubox/secubox-app-metacatalog/README.md @@ -0,0 +1,163 @@ +# SecuBox Meta Cataloger + +Virtual library system that aggregates MetaBlogizer sites, Streamlit apps, and other services into a unified catalog organized by themed **Virtual Books**. + +## Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ META CATALOGER │ +│ "Bibliothèque Virtuelle SecuBox" │ +├─────────────────────────────────────────────────────────────────┤ +│ 📚 VIRTUAL BOOKS (auto-generated collections) │ +│ ├── 🔮 Divination & I-Ching │ +│ │ ├── lldh360.maegia.tv (HERMÈS·360 Oracle) │ +│ │ └── yijing.gk2.secubox.in │ +│ ├── 🎮 Interactive Visualizations │ +│ │ └── wall.maegia.tv (MAGIC·CHESS·360) │ +│ ├── 📊 Data & Analytics │ +│ │ └── control.gk2.secubox.in (SecuBox Control) │ +│ └── 📝 Publications & Blogs │ +│ └── gandalf.maegia.tv │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## CLI Commands + +```bash +# Full catalog sync (scan + index + assign books + generate landing) +metacatalogctl sync + +# Scan specific source +metacatalogctl scan # All sources +metacatalogctl scan metablogizer # MetaBlogizer sites only +metacatalogctl scan streamlit # Streamlit apps only + +# Index management +metacatalogctl index list # List all indexed entries +metacatalogctl index show # Show entry details +metacatalogctl index refresh # Rebuild index + +# Virtual books +metacatalogctl books list # List all books with entry counts +metacatalogctl books show # Show book contents + +# Search +metacatalogctl search # Full-text search across catalog + +# Maintenance +metacatalogctl status # Show catalog statistics +metacatalogctl landing # Regenerate landing page only +``` + +## UCI Configuration + +The configuration is in `/etc/config/metacatalog`: + +```uci +config metacatalog 'main' + option enabled '1' + option data_dir '/srv/metacatalog' + option auto_scan_interval '3600' + option landing_path '/www/metacatalog/index.html' + +# Content sources +config source 'metablogizer' + option enabled '1' + option type 'metablogizer' + option path '/srv/metablogizer/sites' + +config source 'streamlit' + option enabled '1' + option type 'streamlit' + option config '/etc/config/streamlit-forge' + +# Virtual book definitions +config book 'divination' + option name 'Divination & I-Ching' + option icon '🔮' + option color '#cc00ff' + option description 'Outils oraculaires et systèmes divinatoires' + list keywords 'iching' + list keywords 'oracle' + list keywords 'divination' + list domain_patterns 'lldh' + list domain_patterns 'yijing' + +config book 'visualization' + option name 'Interactive Visualizations' + option icon '🎮' + option color '#00ff88' + list keywords 'canvas' + list keywords 'animation' + list domain_patterns 'wall' +``` + +## File Structure + +``` +/etc/config/metacatalog # UCI configuration +/usr/sbin/metacatalogctl # CLI tool +/srv/metacatalog/ +├── index.json # Main catalog index +├── books.json # Virtual books with entries +├── entries/ # Individual entry JSON files +│ ├── lldh360-maegia-tv.json +│ └── ... +└── cache/ # Scan cache +/www/metacatalog/ +├── index.html # Landing page (Tao prism theme) +└── api/ + ├── index.json # API: all entries + └── books.json # API: all books +``` + +## Default Virtual Books + +| ID | Name | Icon | Keywords | +|----|------|------|----------| +| divination | Divination & I-Ching | 🔮 | iching, oracle, hexagram, yijing, bazi | +| visualization | Interactive Visualizations | 🎮 | canvas, animation, 3d, game | +| analytics | Data & Analytics | 📊 | dashboard, data, analytics, metrics | +| publications | Publications & Blogs | 📝 | blog, article, press, news | +| security | Security Tools | 🛡️ | security, waf, firewall, crowdsec | +| media | Media & Entertainment | 🎬 | video, audio, streaming, media | + +## Auto-Assignment + +Entries are automatically assigned to books based on: +- **Keywords**: Matched against entry title, description, and extracted keywords +- **Domain patterns**: Matched against the entry domain name + +Configure rules in UCI: +```bash +uci add_list metacatalog.divination.keywords='tarot' +uci add_list metacatalog.divination.domain_patterns='tarot' +uci commit metacatalog +metacatalogctl sync +``` + +## Cron Integration + +Hourly auto-sync is configured via `/etc/cron.d/metacatalog`: +``` +0 * * * * root /usr/sbin/metacatalogctl sync --quiet >/dev/null 2>&1 +``` + +## API Access + +Landing page and JSON APIs are available at: +- Landing: `https://secubox.in/metacatalog/` +- Entries: `https://secubox.in/metacatalog/api/index.json` +- Books: `https://secubox.in/metacatalog/api/books.json` + +## Dependencies + +- `jsonfilter` - JSON parsing (libubox) +- `coreutils-stat` - File timestamps + +## Integration + +- **MetaBlogizer**: Auto-scans `/srv/metablogizer/sites/` for published sites +- **Streamlit Forge**: Reads `/etc/config/streamlit-forge` for app definitions +- **HAProxy**: Checks vhost SSL/WAF status for exposure info diff --git a/package/secubox/secubox-app-metacatalog/files/etc/config/metacatalog b/package/secubox/secubox-app-metacatalog/files/etc/config/metacatalog new file mode 100644 index 00000000..0e2ee2a8 --- /dev/null +++ b/package/secubox/secubox-app-metacatalog/files/etc/config/metacatalog @@ -0,0 +1,104 @@ +config metacatalog 'main' + option enabled '1' + option data_dir '/srv/metacatalog' + option auto_scan_interval '3600' + option landing_path '/www/metacatalog/index.html' + +config source 'metablogizer' + option enabled '1' + option type 'metablogizer' + option path '/srv/metablogizer/sites' + +config source 'streamlit' + option enabled '1' + option type 'streamlit' + option config '/etc/config/streamlit-forge' + +config source 'haproxy' + option enabled '1' + option type 'haproxy' + option config '/etc/config/haproxy' + +config book 'divination' + option name 'Divination & I-Ching' + option icon '🔮' + option color '#cc00ff' + option description 'Outils oraculaires et systèmes divinatoires' + list keywords 'iching' + list keywords 'oracle' + list keywords 'divination' + list keywords 'hexagram' + list keywords 'yijing' + list keywords 'bazi' + list keywords 'tarot' + list domain_patterns 'lldh' + list domain_patterns 'oracle' + list domain_patterns 'yijing' + list domain_patterns 'bazi' + +config book 'visualization' + option name 'Interactive Visualizations' + option icon '🎮' + option color '#00ff88' + option description 'Visualisations interactives et animations' + list keywords 'canvas' + list keywords 'animation' + list keywords 'interactive' + list keywords 'game' + list keywords '3d' + list domain_patterns 'wall' + list domain_patterns 'play' + list domain_patterns 'pix' + +config book 'analytics' + option name 'Data & Analytics' + option icon '📊' + option color '#00ffff' + option description 'Tableaux de bord et outils analytiques' + list keywords 'dashboard' + list keywords 'analytics' + list keywords 'data' + list keywords 'metrics' + list keywords 'control' + list domain_patterns 'control' + list domain_patterns 'evolution' + list domain_patterns 'money' + +config book 'publications' + option name 'Publications & Blogs' + option icon '📝' + option color '#ff9500' + option description 'Publications, blogs et articles' + list keywords 'blog' + list keywords 'article' + list keywords 'press' + list keywords 'news' + list keywords 'zine' + list domain_patterns 'gandalf' + list domain_patterns 'cyberzine' + list domain_patterns 'press' + +config book 'security' + option name 'Security Tools' + option icon '🛡️' + option color '#ff0066' + option description 'Outils de sécurité et protection' + list keywords 'security' + list keywords 'waf' + list keywords 'firewall' + list keywords 'crowdsec' + list keywords 'protection' + +config book 'media' + option name 'Media & Entertainment' + option icon '🎬' + option color '#ffff00' + option description 'Médias, streaming et divertissement' + list keywords 'video' + list keywords 'audio' + list keywords 'streaming' + list keywords 'media' + list keywords 'jellyfin' + list domain_patterns 'media' + list domain_patterns 'tube' + list domain_patterns 'live' diff --git a/package/secubox/secubox-app-metacatalog/files/usr/sbin/metacatalogctl b/package/secubox/secubox-app-metacatalog/files/usr/sbin/metacatalogctl new file mode 100644 index 00000000..fc715941 --- /dev/null +++ b/package/secubox/secubox-app-metacatalog/files/usr/sbin/metacatalogctl @@ -0,0 +1,678 @@ +#!/bin/sh +# SecuBox Meta Cataloger +# Copyright (C) 2026 CyberMind.fr +# +# Aggregates MetaBlogizer sites, Streamlit apps, and services +# into a unified catalog with Virtual Books organization + +. /lib/functions.sh + +CONFIG="metacatalog" +VERSION="1.0.0" + +# Paths +DATA_DIR="/srv/metacatalog" +ENTRIES_DIR="$DATA_DIR/entries" +CACHE_DIR="$DATA_DIR/cache" +INDEX_FILE="$DATA_DIR/index.json" +BOOKS_FILE="$DATA_DIR/books.json" +LANDING_PATH="/www/metacatalog" + +# Logging +log_info() { echo "[INFO] $*"; logger -t metacatalog "$*"; } +log_warn() { echo "[WARN] $*" >&2; logger -t metacatalog -p warning "$*"; } +log_error() { echo "[ERROR] $*" >&2; logger -t metacatalog -p err "$*"; } + +# ═══════════════════════════════════════════════════════════════ +# HELPERS +# ═══════════════════════════════════════════════════════════════ + +ensure_dirs() { + mkdir -p "$DATA_DIR" "$ENTRIES_DIR" "$CACHE_DIR" "$LANDING_PATH/api" +} + +uci_get() { uci -q get ${CONFIG}.$1; } + +json_escape() { + printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/ /\\t/g' | tr '\n' ' ' +} + +# Generate entry ID from domain +make_id() { + echo "$1" | sed 's/[^a-zA-Z0-9]/-/g' | tr '[:upper:]' '[:lower:]' +} + +# Get current timestamp +now_iso() { + date -u +"%Y-%m-%dT%H:%M:%SZ" +} + +# ═══════════════════════════════════════════════════════════════ +# METABLOGIZER SCANNER +# ═══════════════════════════════════════════════════════════════ + +scan_metablogizer() { + local sites_root=$(uci_get source_metablogizer.path) + [ -z "$sites_root" ] && sites_root="/srv/metablogizer/sites" + [ ! -d "$sites_root" ] && return 0 + + log_info "Scanning MetaBlogizer sites in $sites_root" + local count=0 + + for site_dir in "$sites_root"/*/; do + [ -d "$site_dir" ] || continue + local site=$(basename "$site_dir") + local index_html="$site_dir/index.html" + [ -f "$index_html" ] || continue + + # Get UCI config for this site + local domain=$(uci -q get metablogizer.site_$site.domain 2>/dev/null) + [ -z "$domain" ] && domain="$site.gk2.secubox.in" + local port=$(uci -q get metablogizer.site_$site.port 2>/dev/null) + [ -z "$port" ] && port="80" + + # Extract metadata from HTML (BusyBox-compatible) + local title=$(sed -n 's/.*\([^<]*\)<\/title>.*/\1/p' "$index_html" 2>/dev/null | head -1) + [ -z "$title" ] && title="$site" + local description=$(sed -n 's/.*meta[^>]*description[^>]*content="\([^"]*\)".*/\1/p' "$index_html" 2>/dev/null | head -1) + + # Detect features + local has_canvas="false" + grep -q '<canvas' "$index_html" && has_canvas="true" + local has_audio="false" + grep -qE 'AudioContext|new Audio|audio' "$index_html" && has_audio="true" + + # Extract languages (BusyBox-compatible) + local languages="" + languages=$(sed -n "s/.*setLang(['\"]\\{0,1\\}\\([a-z]\\{2\\}\\).*/\\1/p" "$index_html" 2>/dev/null | sort -u | tr '\n' ',' | sed 's/,$//') + [ -z "$languages" ] && languages=$(sed -n 's/.*lang=["\x27]\{0,1\}\([a-z]\{2\}\).*/\1/p' "$index_html" 2>/dev/null | head -1) + + # Extract primary colors from CSS (BusyBox-compatible) + local colors="" + colors=$(grep -oE '#[0-9a-fA-F]{6}' "$index_html" 2>/dev/null | sort -u | head -5 | tr '\n' ',' | sed 's/,$//') + + # Extract keywords from title/content + local keywords="" + keywords=$(echo "$title $description" | tr '[:upper:]' '[:lower:]' | grep -oE '[a-z]{4,}' | sort -u | head -10 | tr '\n' ',' | sed 's/,$//') + + # File stats (BusyBox-compatible) + local file_count=$(find "$site_dir" -type f 2>/dev/null | wc -l) + local size_kb=$(du -sk "$site_dir" 2>/dev/null | cut -f1) + local size_bytes=$((${size_kb:-0} * 1024)) + + # Check exposure status + local ssl="false" + local waf="false" + uci -q get haproxy.${site//-/_}_*.ssl >/dev/null 2>&1 && ssl="true" + local backend=$(uci -q get haproxy.${site//-/_}_*.backend 2>/dev/null) + [ "$backend" = "mitmproxy_inspector" ] && waf="true" + + # Generate entry ID + local entry_id=$(make_id "$domain") + + # Get timestamps (BusyBox-compatible using ls) + local created=$(ls -ld --time-style=+%Y-%m-%dT%H:%M:%SZ "$site_dir" 2>/dev/null | awk '{print $6}') + local updated=$(ls -l --time-style=+%Y-%m-%dT%H:%M:%SZ "$index_html" 2>/dev/null | awk '{print $6}') + + # Write entry JSON + cat > "$ENTRIES_DIR/$entry_id.json" <<EOF +{ + "id": "$entry_id", + "type": "metablog", + "name": "$(json_escape "$site")", + "domain": "$domain", + "url": "https://$domain/", + "port": $port, + "source": "metablogizer", + "created": "$created", + "updated": "$updated", + "metadata": { + "title": "$(json_escape "$title")", + "description": "$(json_escape "$description")", + "languages": "$(json_escape "$languages")", + "keywords": "$(json_escape "$keywords")", + "colors": "$(json_escape "$colors")", + "has_canvas": $has_canvas, + "has_audio": $has_audio, + "file_count": $file_count, + "size_bytes": $size_bytes + }, + "books": [], + "status": "published", + "exposure": { + "ssl": $ssl, + "waf": $waf, + "tor": false + } +} +EOF + count=$((count + 1)) + log_info " Indexed: $site -> $domain" + done + + log_info "MetaBlogizer: $count sites indexed" +} + +# ═══════════════════════════════════════════════════════════════ +# STREAMLIT SCANNER +# ═══════════════════════════════════════════════════════════════ + +scan_streamlit() { + local apps_dir="/srv/streamlit/apps" + [ ! -d "$apps_dir" ] && return 0 + + log_info "Scanning Streamlit apps in $apps_dir" + local count=0 + + for app_dir in "$apps_dir"/*/; do + [ -d "$app_dir" ] || continue + local app=$(basename "$app_dir") + + # Find main Python file + local main_py="" + for f in "$app_dir/src/app.py" "$app_dir/src/main.py" "$app_dir/src/$app.py"; do + [ -f "$f" ] && { main_py="$f"; break; } + done + [ -z "$main_py" ] && main_py=$(find "$app_dir/src" -name "*.py" -type f 2>/dev/null | head -1) + [ -z "$main_py" ] && continue + + # Get UCI config + local domain=$(uci -q get streamlit-forge.$app.domain 2>/dev/null) + [ -z "$domain" ] && domain="$app.gk2.secubox.in" + local port=$(uci -q get streamlit-forge.$app.port 2>/dev/null) + [ -z "$port" ] && port="8501" + local enabled=$(uci -q get streamlit-forge.$app.enabled 2>/dev/null) + [ "$enabled" != "1" ] && continue + + # Extract title from set_page_config (BusyBox-compatible) + local title=$(sed -n 's/.*page_title\s*=\s*["\x27]\([^"\x27]*\).*/\1/p' "$main_py" 2>/dev/null | head -1) + [ -z "$title" ] && title="$app" + + # Extract page icon (BusyBox-compatible) + local icon=$(sed -n 's/.*page_icon\s*=\s*["\x27]\([^"\x27]*\).*/\1/p' "$main_py" 2>/dev/null | head -1) + + # Check requirements + local deps="" + [ -f "$app_dir/src/requirements.txt" ] && deps=$(cat "$app_dir/src/requirements.txt" | tr '\n' ',' | sed 's/,$//') + + # Generate entry ID + local entry_id=$(make_id "$domain") + + # Get timestamps (BusyBox-compatible) + local created=$(ls -ld --time-style=+%Y-%m-%dT%H:%M:%SZ "$app_dir" 2>/dev/null | awk '{print $6}') + local updated=$(ls -l --time-style=+%Y-%m-%dT%H:%M:%SZ "$main_py" 2>/dev/null | awk '{print $6}') + + # File count + local file_count=$(find "$app_dir" -type f 2>/dev/null | wc -l) + + # Check exposure + local ssl="false" + local waf="false" + uci -q get haproxy.*_$app.ssl >/dev/null 2>&1 && ssl="true" + + cat > "$ENTRIES_DIR/$entry_id.json" <<EOF +{ + "id": "$entry_id", + "type": "streamlit", + "name": "$(json_escape "$app")", + "domain": "$domain", + "url": "https://$domain/", + "port": $port, + "source": "streamlit-forge", + "created": "$created", + "updated": "$updated", + "metadata": { + "title": "$(json_escape "$title")", + "icon": "$(json_escape "$icon")", + "dependencies": "$(json_escape "$deps")", + "file_count": $file_count + }, + "books": [], + "status": "published", + "exposure": { + "ssl": $ssl, + "waf": $waf, + "tor": false + } +} +EOF + count=$((count + 1)) + log_info " Indexed: $app -> $domain" + done + + log_info "Streamlit: $count apps indexed" +} + +# ═══════════════════════════════════════════════════════════════ +# BOOK ASSIGNMENT +# ═══════════════════════════════════════════════════════════════ + +assign_books() { + log_info "Assigning entries to virtual books..." + + # Load book definitions + local books_tmp="/tmp/metacatalog_books_$$.json" + echo "[" > "$books_tmp" + local first_book=1 + + config_load metacatalog + config_foreach _collect_book book + + # Process each entry + for entry_file in "$ENTRIES_DIR"/*.json; do + [ -f "$entry_file" ] || continue + local entry_id=$(basename "$entry_file" .json) + + # Read entry data + local domain=$(jsonfilter -i "$entry_file" -e '@.domain' 2>/dev/null) + local title=$(jsonfilter -i "$entry_file" -e '@.metadata.title' 2>/dev/null) + local keywords=$(jsonfilter -i "$entry_file" -e '@.metadata.keywords' 2>/dev/null) + + # Combine searchable text + local search_text=$(echo "$domain $title $keywords" | tr '[:upper:]' '[:lower:]') + + # Check against each book + local matched_books="" + config_foreach _match_book book "$entry_id" "$search_text" + + # Update entry with matched books + if [ -n "$matched_books" ]; then + local books_json=$(echo "$matched_books" | sed 's/,$//' | sed 's/\([^,]*\)/"\1"/g' | tr ',' ',') + sed -i "s/\"books\": \[\]/\"books\": [$books_json]/" "$entry_file" + fi + done + + log_info "Book assignment complete" +} + +_collect_book() { + local section="$1" + local name=$(uci_get $section.name) + local icon=$(uci_get $section.icon) + local color=$(uci_get $section.color) + local desc=$(uci_get $section.description) + + [ -z "$name" ] && return + + # Collect keywords + local keywords="" + config_list_foreach "$section" keywords _append_keyword + + # Collect domain patterns + local patterns="" + config_list_foreach "$section" domain_patterns _append_pattern +} + +_append_keyword() { keywords="$keywords,$1"; } +_append_pattern() { patterns="$patterns,$1"; } + +_match_book() { + local section="$1" + local entry_id="$2" + local search_text="$3" + + local match=0 + + # Check keywords + local kw + config_list_foreach "$section" keywords _check_kw + + # Check domain patterns + config_list_foreach "$section" domain_patterns _check_pattern + + if [ $match -gt 0 ]; then + matched_books="$matched_books$section," + fi +} + +_check_kw() { + echo "$search_text" | grep -qi "$1" && match=1 +} + +_check_pattern() { + echo "$search_text" | grep -qi "$1" && match=1 +} + +# ═══════════════════════════════════════════════════════════════ +# INDEX GENERATION +# ═══════════════════════════════════════════════════════════════ + +generate_index() { + log_info "Generating index.json..." + + echo "{" > "$INDEX_FILE" + echo ' "version": "'$VERSION'",' >> "$INDEX_FILE" + echo ' "generated": "'$(now_iso)'",' >> "$INDEX_FILE" + echo ' "entries": [' >> "$INDEX_FILE" + + local first=1 + for entry_file in "$ENTRIES_DIR"/*.json; do + [ -f "$entry_file" ] || continue + [ $first -eq 0 ] && echo "," >> "$INDEX_FILE" + cat "$entry_file" >> "$INDEX_FILE" + first=0 + done + + echo "" >> "$INDEX_FILE" + echo " ]" >> "$INDEX_FILE" + echo "}" >> "$INDEX_FILE" + + # Copy to web API + cp "$INDEX_FILE" "$LANDING_PATH/api/index.json" + + local count=$(ls -1 "$ENTRIES_DIR"/*.json 2>/dev/null | wc -l) + log_info "Index generated: $count entries" +} + +generate_books_json() { + log_info "Generating books.json..." + + echo "{" > "$BOOKS_FILE" + echo ' "version": "'$VERSION'",' >> "$BOOKS_FILE" + echo ' "generated": "'$(now_iso)'",' >> "$BOOKS_FILE" + echo ' "books": [' >> "$BOOKS_FILE" + + local first=1 + config_load metacatalog + config_foreach _output_book book + + echo "" >> "$BOOKS_FILE" + echo " ]" >> "$BOOKS_FILE" + echo "}" >> "$BOOKS_FILE" + + cp "$BOOKS_FILE" "$LANDING_PATH/api/books.json" +} + +_output_book() { + local section="$1" + local name=$(uci_get $section.name) + local icon=$(uci_get $section.icon) + local color=$(uci_get $section.color) + local desc=$(uci_get $section.description) + + [ -z "$name" ] && return + + # Find entries in this book + local entries="" + for entry_file in "$ENTRIES_DIR"/*.json; do + [ -f "$entry_file" ] || continue + local books=$(jsonfilter -i "$entry_file" -e '@.books[*]' 2>/dev/null) + echo "$books" | grep -q "$section" && { + local eid=$(jsonfilter -i "$entry_file" -e '@.id') + entries="$entries\"$eid\"," + } + done + entries=$(echo "$entries" | sed 's/,$//') + + [ $first -eq 0 ] && echo "," >> "$BOOKS_FILE" + cat >> "$BOOKS_FILE" <<EOF + { + "id": "$section", + "name": "$(json_escape "$name")", + "icon": "$icon", + "color": "$color", + "description": "$(json_escape "$desc")", + "entries": [$entries] + } +EOF + first=0 +} + +# ═══════════════════════════════════════════════════════════════ +# LANDING PAGE +# ═══════════════════════════════════════════════════════════════ + +generate_landing() { + log_info "Generating landing page..." + + cat > "$LANDING_PATH/index.html" <<'HTMLEOF' +<!DOCTYPE html> +<html lang="fr"> +<head> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width,initial-scale=1"> +<title>Bibliotheque Virtuelle SecuBox + + + +

Bibliotheque Virtuelle

+
Chargement...
+
+
SecuBox Meta Cataloger v1.0
+ + + +HTMLEOF + + log_info "Landing page generated at $LANDING_PATH/index.html" +} + +# ═══════════════════════════════════════════════════════════════ +# COMMANDS +# ═══════════════════════════════════════════════════════════════ + +cmd_scan() { + ensure_dirs + local source="$1" + + if [ -n "$source" ]; then + case "$source" in + metablogizer) scan_metablogizer ;; + streamlit) scan_streamlit ;; + *) log_error "Unknown source: $source"; return 1 ;; + esac + else + scan_metablogizer + scan_streamlit + fi +} + +cmd_index() { + local subcmd="$1" + shift + + case "$subcmd" in + list) + for f in "$ENTRIES_DIR"/*.json; do + [ -f "$f" ] || continue + local id=$(basename "$f" .json) + local type=$(jsonfilter -i "$f" -e '@.type') + local domain=$(jsonfilter -i "$f" -e '@.domain') + local title=$(jsonfilter -i "$f" -e '@.metadata.title') + printf "%-25s %-10s %-30s %s\n" "$id" "$type" "$domain" "$title" + done + ;; + show) + local id="$1" + [ -f "$ENTRIES_DIR/$id.json" ] && cat "$ENTRIES_DIR/$id.json" | jsonfilter -e '@' + ;; + refresh) + cmd_scan + assign_books + generate_index + generate_books_json + ;; + *) + echo "Usage: metacatalogctl index [list|show |refresh]" + ;; + esac +} + +cmd_books() { + local subcmd="$1" + shift + + case "$subcmd" in + list) + config_load metacatalog + config_foreach _print_book book + ;; + show) + local book_id="$1" + [ -f "$BOOKS_FILE" ] && jsonfilter -i "$BOOKS_FILE" -e "@.books[@.id='$book_id']" + ;; + *) + echo "Usage: metacatalogctl books [list|show ]" + ;; + esac +} + +_print_book() { + local section="$1" + local name=$(uci_get $section.name) + local icon=$(uci_get $section.icon) + local count=0 + + for f in "$ENTRIES_DIR"/*.json; do + [ -f "$f" ] || continue + jsonfilter -i "$f" -e '@.books[*]' 2>/dev/null | grep -q "$section" && count=$((count + 1)) + done + + printf "%s %-25s %s (%d entries)\n" "$icon" "$name" "$section" "$count" +} + +cmd_search() { + local query=$(echo "$*" | tr '[:upper:]' '[:lower:]') + [ -z "$query" ] && { echo "Usage: metacatalogctl search "; return 1; } + + for f in "$ENTRIES_DIR"/*.json; do + [ -f "$f" ] || continue + local content=$(cat "$f" | tr '[:upper:]' '[:lower:]') + if echo "$content" | grep -q "$query"; then + local id=$(jsonfilter -i "$f" -e '@.id') + local type=$(jsonfilter -i "$f" -e '@.type') + local domain=$(jsonfilter -i "$f" -e '@.domain') + local title=$(jsonfilter -i "$f" -e '@.metadata.title') + printf "%-10s %-30s %s\n" "$type" "$domain" "$title" + fi + done +} + +cmd_sync() { + log_info "Full catalog sync..." + ensure_dirs + cmd_scan + assign_books + generate_index + generate_books_json + generate_landing + log_info "Sync complete" +} + +cmd_status() { + local entries=$(ls -1 "$ENTRIES_DIR"/*.json 2>/dev/null | wc -l) + local metablogs=$(grep -l '"type": "metablog"' "$ENTRIES_DIR"/*.json 2>/dev/null | wc -l) + local streamlits=$(grep -l '"type": "streamlit"' "$ENTRIES_DIR"/*.json 2>/dev/null | wc -l) + + echo "Meta Cataloger Status" + echo "====================" + echo "Total entries: $entries" + echo " MetaBlogs: $metablogs" + echo " Streamlits: $streamlits" + echo "" + echo "Virtual Books:" + cmd_books list +} + +cmd_landing() { + generate_landing +} + +show_help() { + cat < [options] + +Commands: + scan [source] Scan content sources (metablogizer|streamlit) + index list List all indexed entries + index show Show entry details + index refresh Full rescan and reindex + books list List virtual books + books show Show book contents + search Search catalog + sync Full scan + index + landing + landing Regenerate landing page + status Show catalog status + help Show this help + +EOF +} + +# ═══════════════════════════════════════════════════════════════ +# MAIN +# ═══════════════════════════════════════════════════════════════ + +case "$1" in + scan) shift; cmd_scan "$@" ;; + index) shift; cmd_index "$@" ;; + books) shift; cmd_books "$@" ;; + search) shift; cmd_search "$@" ;; + sync) cmd_sync ;; + landing) cmd_landing ;; + status) cmd_status ;; + help|--help|-h|"") show_help ;; + *) log_error "Unknown command: $1"; show_help; exit 1 ;; +esac