From ab49e19c32e4e8dd338bdafac961d0fcf0f08f40 Mon Sep 17 00:00:00 2001
From: CyberMind-FR <gandalf@Gk2.net>
Date: Sat, 21 Feb 2026 18:15:55 +0100
Subject: [PATCH] feat(peertube): Add transcript extraction & AI analysis tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New CLI: peertube-analyse
- Extract video metadata via yt-dlp
- Download existing PeerTube subtitles (VTT)
- Fallback to Whisper local transcription (medium model)
- Claude AI analysis with structured intelligence report

Features:
- POSIX-compatible (OpenWrt, Alpine, Debian)
- Modular pipeline with graceful degradation
- Colored terminal output with status indicators
- Configurable Whisper model and language
- Truncation for large transcripts (12k chars)

CLI flags:
  --url <url>         Video URL
  --no-whisper        Subtitles only
  --force-whisper     Force transcription
  --no-analyse        Skip Claude analysis
  --model <name>      Whisper model
  --lang <code>       Language code

Output structure:
  ./output/<slug>/
    ├── <slug>.meta.json
    ├── <slug>.transcript.txt
    └── <slug>.analyse.md

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 package/secubox/secubox-app-peertube/Makefile |   4 +-
 .../files/usr/sbin/peertube-analyse           | 778 ++++++++++++++++++
 2 files changed, 781 insertions(+), 1 deletion(-)
 create mode 100755 package/secubox/secubox-app-peertube/files/usr/sbin/peertube-analyse
diff --git a/package/secubox/secubox-app-peertube/Makefile b/package/secubox/secubox-app-peertube/Makefile
index 74b1679b..9ea0d886 100644
--- a/package/secubox/secubox-app-peertube/Makefile
+++ b/package/secubox/secubox-app-peertube/Makefile
@@ -2,7 +2,7 @@ include $(TOPDIR)/rules.mk
 
 PKG_NAME:=secubox-app-peertube
 PKG_RELEASE:=1
-PKG_VERSION:=1.0.0
+PKG_VERSION:=1.1.0
 PKG_ARCH:=all
 PKG_MAINTAINER:=CyberMind Studio <contact@cybermind.fr>
 PKG_LICENSE:=AGPL-3.0
@@ -22,6 +22,7 @@ define Package/secubox-app-peertube/description
 PeerTube federated video streaming platform.
 Runs in an LXC Debian container with PostgreSQL, Redis, and Node.js.
 Supports video hosting, live streaming, and ActivityPub federation.
+Includes peertube-analyse: transcript extraction and Claude AI analysis.
 endef
 
 define Package/secubox-app-peertube/conffiles
@@ -40,6 +41,7 @@ define Package/secubox-app-peertube/install
 
 	$(INSTALL_DIR) $(1)/usr/sbin
 	$(INSTALL_BIN) ./files/usr/sbin/peertubectl $(1)/usr/sbin/peertubectl
+	$(INSTALL_BIN) ./files/usr/sbin/peertube-analyse $(1)/usr/sbin/peertube-analyse
 endef
 
 $(eval $(call BuildPackage,secubox-app-peertube))
diff --git a/package/secubox/secubox-app-peertube/files/usr/sbin/peertube-analyse b/package/secubox/secubox-app-peertube/files/usr/sbin/peertube-analyse
new file mode 100755
index 00000000..3eca8c22
--- /dev/null
+++ b/package/secubox/secubox-app-peertube/files/usr/sbin/peertube-analyse
@@ -0,0 +1,778 @@
+#!/bin/sh
+# PeerTube Video Transcript Extraction & AI Analysis
+# SecuBox Intelligence Module
+# Compatible: OpenWrt, Alpine, Debian, Ubuntu
+
+set -e
+
+#=============================================================================
+# CONFIGURATION
+#=============================================================================
+
+SCRIPT_VERSION="1.0.0"
+PEERTUBE_INSTANCE="${PEERTUBE_INSTANCE:-tube.gk2.secubox.in}"
+OUTPUT_BASE="${OUTPUT_BASE:-./output}"
+WHISPER_MODEL="${WHISPER_MODEL:-medium}"
+WHISPER_LANG="${WHISPER_LANG:-fr}"
+CLAUDE_MODEL="${CLAUDE_MODEL:-claude-sonnet-4-6}"
+MAX_TRANSCRIPT_CHARS=12000
+MAX_TOKENS=2000
+
+#=============================================================================
+# COLORS & LOGGING
+#=============================================================================
+
+# Check if terminal supports colors
+if [ -t 1 ] && command -v tput >/dev/null 2>&1; then
+    RED=$(tput setaf 1)
+    GREEN=$(tput setaf 2)
+    YELLOW=$(tput setaf 3)
+    CYAN=$(tput setaf 6)
+    BOLD=$(tput bold)
+    NC=$(tput sgr0)
+else
+    RED='\033[0;31m'
+    GREEN='\033[0;32m'
+    YELLOW='\033[1;33m'
+    CYAN='\033[0;36m'
+    BOLD='\033[1m'
+    NC='\033[0m'
+fi
+
+log_info()    { printf "%b[INFO]%b %s\n" "$CYAN" "$NC" "$1"; }
+log_ok()      { printf "%b[OK]%b %s\n" "$GREEN" "$NC" "$1"; }
+log_warn()    { printf "%b[WARN]%b %s\n" "$YELLOW" "$NC" "$1"; }
+log_error()   { printf "%b[ERROR]%b %s\n" "$RED" "$NC" "$1" >&2; }
+log_step()    { printf "\n%b==> %s%b\n" "$BOLD$CYAN" "$1" "$NC"; }
+
+#=============================================================================
+# DEPENDENCY CHECK
+#=============================================================================
+
+check_dependencies() {
+    log_step "Checking dependencies"
+
+    local missing=""
+    local deps="yt-dlp ffmpeg jq curl"
+
+    for dep in $deps; do
+        if command -v "$dep" >/dev/null 2>&1; then
+            log_ok "$dep found: $(command -v "$dep")"
+        else
+            log_error "$dep not found"
+            missing="$missing $dep"
+        fi
+    done
+
+    # Whisper check (optional but warned)
+    if command -v whisper >/dev/null 2>&1; then
+        log_ok "whisper found: $(command -v whisper)"
+        WHISPER_CMD="whisper"
+    elif command -v whisper-cpp >/dev/null 2>&1; then
+        log_ok "whisper-cpp found: $(command -v whisper-cpp)"
+        WHISPER_CMD="whisper-cpp"
+    elif command -v main >/dev/null 2>&1 && [ -f "$(dirname "$(command -v main)")/models/ggml-medium.bin" ]; then
+        log_ok "whisper.cpp (main) found"
+        WHISPER_CMD="main"
+    else
+        log_warn "whisper not found - transcription will only work with existing subtitles"
+        WHISPER_CMD=""
+    fi
+
+    # API key check
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+        log_warn "ANTHROPIC_API_KEY not set - AI analysis disabled"
+    else
+        log_ok "ANTHROPIC_API_KEY configured"
+    fi
+
+    if [ -n "$missing" ]; then
+        log_error "Missing required dependencies:$missing"
+        return 1
+    fi
+
+    return 0
+}
+
+#=============================================================================
+# UTILITY FUNCTIONS
+#=============================================================================
+
+# Extract video ID from PeerTube URL
+extract_video_id() {
+    local url="$1"
+    # Handle various URL formats:
+    # https://instance/w/VIDEO_ID
+    # https://instance/videos/watch/VIDEO_ID
+    # https://instance/videos/watch/VIDEO_UUID
+    echo "$url" | sed -E 's|.*/w/([^/?]+).*|\1|; s|.*/videos/watch/([^/?]+).*|\1|'
+}
+
+# Generate slug from title
+generate_slug() {
+    echo "$1" | tr '[:upper:]' '[:lower:]' | \
+        sed -E 's/[àáâãäå]/a/g; s/[èéêë]/e/g; s/[ìíîï]/i/g; s/[òóôõö]/o/g; s/[ùúûü]/u/g; s/[ç]/c/g' | \
+        sed -E 's/[^a-z0-9]+/-/g; s/^-+|-+$//g' | \
+        cut -c1-50
+}
+
+# Clean VTT to plain text
+vtt_to_text() {
+    local vtt_file="$1"
+    local txt_file="$2"
+
+    # Remove VTT headers, timestamps, positioning, and duplicates
+    sed -E '
+        /^WEBVTT/d
+        /^Kind:/d
+        /^Language:/d
+        /^NOTE/d
+        /^[0-9]+$/d
+        /^[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{3}/d
+        /^$/d
+        s/<[^>]*>//g
+        s/&nbsp;/ /g
+        s/&amp;/\&/g
+        s/&lt;/</g
+        s/&gt;/>/g
+    ' "$vtt_file" | \
+    awk '!seen[$0]++' | \
+    tr '\n' ' ' | \
+    sed -E 's/  +/ /g; s/^ +| +$//g' > "$txt_file"
+}
+
+# Truncate text to max chars while preserving word boundaries
+truncate_text() {
+    local text="$1"
+    local max="$2"
+
+    if [ ${#text} -le "$max" ]; then
+        echo "$text"
+    else
+        echo "$text" | cut -c1-"$max" | sed 's/[^ ]*$//'
+        echo "... [TRUNCATED]"
+    fi
+}
+
+#=============================================================================
+# 1. METADATA EXTRACTION
+#=============================================================================
+
+extract_metadata() {
+    local url="$1"
+    local output_dir="$2"
+    local slug="$3"
+
+    log_step "Extracting metadata"
+
+    local meta_file="$output_dir/${slug}.meta.json"
+
+    # Use yt-dlp to dump JSON metadata
+    if yt-dlp --dump-json --no-warnings "$url" 2>/dev/null > "$meta_file.tmp"; then
+        # Extract relevant fields with jq
+        jq '{
+            id: .id,
+            title: .title,
+            description: .description,
+            duration: .duration,
+            duration_string: .duration_string,
+            upload_date: .upload_date,
+            uploader: .uploader,
+            uploader_id: .uploader_id,
+            channel: .channel,
+            view_count: .view_count,
+            like_count: .like_count,
+            tags: .tags,
+            categories: .categories,
+            webpage_url: .webpage_url,
+            thumbnail: .thumbnail,
+            language: .language,
+            subtitles: (.subtitles | keys),
+            automatic_captions: (.automatic_captions | keys)
+        }' "$meta_file.tmp" > "$meta_file"
+        rm -f "$meta_file.tmp"
+
+        log_ok "Metadata saved to $meta_file"
+
+        # Display summary
+        local title=$(jq -r '.title' "$meta_file")
+        local duration=$(jq -r '.duration_string // .duration' "$meta_file")
+        local uploader=$(jq -r '.uploader // .channel // "Unknown"' "$meta_file")
+
+        printf "  Title: %s\n" "$title"
+        printf "  Duration: %s\n" "$duration"
+        printf "  Uploader: %s\n" "$uploader"
+
+        return 0
+    else
+        log_error "Failed to extract metadata"
+        return 1
+    fi
+}
+
+#=============================================================================
+# 2. SUBTITLE DOWNLOAD & CONVERSION
+#=============================================================================
+
+check_peertube_captions() {
+    local video_id="$1"
+    local instance="$2"
+
+    log_info "Checking PeerTube captions API..."
+
+    local api_url="https://${instance}/api/v1/videos/${video_id}/captions"
+    local response
+
+    response=$(curl -s -w "\n%{http_code}" "$api_url" 2>/dev/null)
+    local http_code=$(echo "$response" | tail -n1)
+    local body=$(echo "$response" | sed '$d')
+
+    if [ "$http_code" = "200" ]; then
+        local caption_count=$(echo "$body" | jq '.total // 0')
+        if [ "$caption_count" -gt 0 ]; then
+            log_ok "Found $caption_count caption(s) via API"
+            echo "$body" | jq -r '.data[].language.id' 2>/dev/null
+            return 0
+        fi
+    fi
+
+    return 1
+}
+
+download_subtitles() {
+    local url="$1"
+    local output_dir="$2"
+    local slug="$3"
+    local lang="${4:-fr}"
+
+    log_step "Downloading subtitles"
+
+    local vtt_file="$output_dir/${slug}.${lang}.vtt"
+    local txt_file="$output_dir/${slug}.transcript.txt"
+
+    # Try to download subtitles with yt-dlp
+    if yt-dlp --write-sub --write-auto-sub --sub-lang "$lang,en" \
+              --sub-format vtt --skip-download \
+              -o "$output_dir/${slug}" "$url" 2>/dev/null; then
+
+        # Find downloaded VTT file
+        local found_vtt=$(find "$output_dir" -name "${slug}*.vtt" -type f | head -1)
+
+        if [ -n "$found_vtt" ] && [ -f "$found_vtt" ]; then
+            # Rename to standard name
+            mv "$found_vtt" "$vtt_file" 2>/dev/null || cp "$found_vtt" "$vtt_file"
+
+            log_ok "Subtitles downloaded: $vtt_file"
+
+            # Convert to plain text
+            vtt_to_text "$vtt_file" "$txt_file"
+            log_ok "Converted to text: $txt_file"
+
+            local word_count=$(wc -w < "$txt_file" | tr -d ' ')
+            printf "  Word count: %s\n" "$word_count"
+
+            return 0
+        fi
+    fi
+
+    log_warn "No subtitles available for download"
+    return 1
+}
+
+#=============================================================================
+# 3. WHISPER TRANSCRIPTION
+#=============================================================================
+
+extract_audio() {
+    local url="$1"
+    local output_dir="$2"
+    local slug="$3"
+
+    log_info "Extracting audio..."
+
+    local audio_file="$output_dir/${slug}.audio.wav"
+
+    # Download and convert to 16kHz mono WAV (Whisper optimal format)
+    if yt-dlp -x --audio-format wav -o "$output_dir/${slug}.%(ext)s" "$url" 2>/dev/null; then
+        # Convert to Whisper-optimal format
+        local downloaded=$(find "$output_dir" -name "${slug}.*" -type f | grep -E '\.(wav|mp3|m4a|opus|webm)$' | head -1)
+
+        if [ -n "$downloaded" ]; then
+            ffmpeg -y -i "$downloaded" -vn -ac 1 -ar 16000 -f wav "$audio_file" 2>/dev/null
+            rm -f "$downloaded"
+            log_ok "Audio extracted: $audio_file"
+            return 0
+        fi
+    fi
+
+    log_error "Failed to extract audio"
+    return 1
+}
+
+run_whisper() {
+    local audio_file="$1"
+    local output_dir="$2"
+    local slug="$3"
+    local model="$4"
+    local lang="$5"
+
+    log_step "Running Whisper transcription"
+
+    if [ -z "$WHISPER_CMD" ]; then
+        log_error "Whisper not available"
+        return 1
+    fi
+
+    local txt_file="$output_dir/${slug}.transcript.txt"
+
+    log_info "Model: $model, Language: $lang"
+    log_info "This may take a while..."
+
+    case "$WHISPER_CMD" in
+        whisper)
+            # OpenAI Whisper Python
+            whisper "$audio_file" \
+                --model "$model" \
+                --language "$lang" \
+                --output_format txt \
+                --output_dir "$output_dir" \
+                --verbose False 2>/dev/null
+
+            # Rename output
+            local whisper_out="$output_dir/$(basename "$audio_file" .wav).txt"
+            [ -f "$whisper_out" ] && mv "$whisper_out" "$txt_file"
+            ;;
+
+        whisper-cpp|main)
+            # whisper.cpp
+            local model_path="${WHISPER_MODELS_PATH:-$HOME/.cache/whisper}/ggml-${model}.bin"
+
+            if [ ! -f "$model_path" ]; then
+                log_warn "Model not found: $model_path"
+                log_info "Downloading model..."
+                # Try to download model
+                mkdir -p "$(dirname "$model_path")"
+                curl -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${model}.bin" \
+                     -o "$model_path" 2>/dev/null || {
+                    log_error "Failed to download model"
+                    return 1
+                }
+            fi
+
+            "$WHISPER_CMD" -m "$model_path" -l "$lang" -otxt -of "$output_dir/${slug}.transcript" "$audio_file" 2>/dev/null
+            ;;
+    esac
+
+    if [ -f "$txt_file" ]; then
+        # Clean up the transcript
+        sed -i 's/\[.*\]//g; s/  */ /g' "$txt_file" 2>/dev/null || \
+        sed 's/\[.*\]//g; s/  */ /g' "$txt_file" > "$txt_file.tmp" && mv "$txt_file.tmp" "$txt_file"
+
+        local word_count=$(wc -w < "$txt_file" | tr -d ' ')
+        log_ok "Transcription complete: $word_count words"
+        return 0
+    else
+        log_error "Whisper transcription failed"
+        return 1
+    fi
+}
+
+#=============================================================================
+# 4. CLAUDE AI ANALYSIS
+#=============================================================================
+
+analyse_with_claude() {
+    local meta_file="$1"
+    local transcript_file="$2"
+    local output_dir="$3"
+    local slug="$4"
+
+    log_step "Running Claude AI analysis"
+
+    if [ -z "$ANTHROPIC_API_KEY" ]; then
+        log_error "ANTHROPIC_API_KEY not set"
+        return 1
+    fi
+
+    if [ ! -f "$transcript_file" ]; then
+        log_error "Transcript file not found: $transcript_file"
+        return 1
+    fi
+
+    local analysis_file="$output_dir/${slug}.analyse.md"
+
+    # Read metadata
+    local title=$(jq -r '.title // "Unknown"' "$meta_file" 2>/dev/null)
+    local duration=$(jq -r '.duration_string // .duration // "Unknown"' "$meta_file" 2>/dev/null)
+    local uploader=$(jq -r '.uploader // .channel // "Unknown"' "$meta_file" 2>/dev/null)
+    local upload_date=$(jq -r '.upload_date // "Unknown"' "$meta_file" 2>/dev/null)
+    local tags=$(jq -r '.tags | if . then join(", ") else "None" end' "$meta_file" 2>/dev/null)
+    local url=$(jq -r '.webpage_url // "Unknown"' "$meta_file" 2>/dev/null)
+
+    # Read and truncate transcript
+    local transcript=$(cat "$transcript_file")
+    local transcript_len=${#transcript}
+
+    if [ "$transcript_len" -gt "$MAX_TRANSCRIPT_CHARS" ]; then
+        log_warn "Transcript truncated from $transcript_len to $MAX_TRANSCRIPT_CHARS chars"
+        transcript=$(echo "$transcript" | head -c "$MAX_TRANSCRIPT_CHARS")
+        transcript="${transcript}... [TRUNCATED - Original: ${transcript_len} chars]"
+    fi
+
+    # Escape special characters for JSON
+    transcript=$(echo "$transcript" | jq -Rs '.')
+    title=$(echo "$title" | jq -Rs '.' | sed 's/^"//;s/"$//')
+
+    log_info "Calling Claude API ($CLAUDE_MODEL)..."
+
+    # Build the API request
+    local system_prompt="Tu es un analyste expert en renseignement, cybersécurité et géopolitique. Tu analyses des transcripts vidéo de manière structurée et rigoureuse. Tu réponds toujours en français."
+
+    local user_prompt="Analyse le transcript vidéo suivant:
+
+=== MÉTADONNÉES ===
+Titre: ${title}
+Durée: ${duration}
+Auteur: ${uploader}
+Date: ${upload_date}
+Tags: ${tags}
+URL: ${url}
+
+=== TRANSCRIPT ===
+${transcript}
+
+=== INSTRUCTIONS ===
+Produis une analyse structurée en Markdown comprenant:
+
+1. **Résumé exécutif** (5 lignes maximum)
+2. **Thèmes principaux et sous-thèmes**
+3. **Acteurs / entités mentionnés** (personnes, organisations, pays)
+4. **Points factuels clés et révélations notables**
+5. **Angle narratif et biais éventuels**
+6. **Pertinence pour un professionnel en cybersécurité et renseignement**
+7. **Questions ouvertes ou points à approfondir**
+
+Sois factuel, précis et critique dans ton analyse."
+
+    # Make API call
+    local response
+    response=$(curl -s -w "\n%{http_code}" "https://api.anthropic.com/v1/messages" \
+        -H "Content-Type: application/json" \
+        -H "x-api-key: $ANTHROPIC_API_KEY" \
+        -H "anthropic-version: 2023-06-01" \
+        -d "$(jq -n \
+            --arg model "$CLAUDE_MODEL" \
+            --argjson max_tokens "$MAX_TOKENS" \
+            --arg system "$system_prompt" \
+            --arg user "$user_prompt" \
+            '{
+                model: $model,
+                max_tokens: $max_tokens,
+                system: $system,
+                messages: [
+                    {role: "user", content: $user}
+                ]
+            }')" 2>/dev/null)
+
+    local http_code=$(echo "$response" | tail -n1)
+    local body=$(echo "$response" | sed '$d')
+
+    if [ "$http_code" != "200" ]; then
+        log_error "API error (HTTP $http_code)"
+        echo "$body" | jq -r '.error.message // .' 2>/dev/null | head -3
+        return 1
+    fi
+
+    # Extract and save the analysis
+    local analysis=$(echo "$body" | jq -r '.content[0].text // empty')
+
+    if [ -z "$analysis" ]; then
+        log_error "Empty response from Claude"
+        return 1
+    fi
+
+    # Create Markdown file with header
+    cat > "$analysis_file" << EOF
+# Analyse: ${title}
+
+**Source:** ${url}
+**Durée:** ${duration}
+**Auteur:** ${uploader}
+**Date:** ${upload_date}
+**Analysé le:** $(date +"%Y-%m-%d %H:%M")
+**Modèle:** ${CLAUDE_MODEL}
+
+---
+
+${analysis}
+
+---
+
+*Analyse générée automatiquement par SecuBox Intelligence Module v${SCRIPT_VERSION}*
+EOF
+
+    log_ok "Analysis saved to $analysis_file"
+
+    # Display summary in terminal
+    printf "\n%b=== RÉSUMÉ ===%b\n" "$BOLD$GREEN" "$NC"
+    echo "$analysis" | sed -n '/Résumé exécutif/,/^##\|^[0-9]\./p' | head -10
+
+    return 0
+}
+
+#=============================================================================
+# MAIN PIPELINE
+#=============================================================================
+
+process_video() {
+    local url="$1"
+
+    log_step "Processing video: $url"
+
+    # Extract video ID and create output directory
+    local video_id=$(extract_video_id "$url")
+    local instance=$(echo "$url" | sed -E 's|https?://([^/]+).*|\1|')
+
+    log_info "Video ID: $video_id"
+    log_info "Instance: $instance"
+
+    # Create temporary metadata to get title for slug
+    local temp_meta=$(mktemp)
+    if ! yt-dlp --dump-json --no-warnings "$url" 2>/dev/null > "$temp_meta"; then
+        log_error "Failed to fetch video info"
+        rm -f "$temp_meta"
+        return 1
+    fi
+
+    local title=$(jq -r '.title // "video"' "$temp_meta")
+    local slug=$(generate_slug "$title")
+    slug="${slug:-$video_id}"
+    rm -f "$temp_meta"
+
+    log_info "Slug: $slug"
+
+    # Create output directory
+    local output_dir="$OUTPUT_BASE/$slug"
+    mkdir -p "$output_dir"
+    log_ok "Output directory: $output_dir"
+
+    # 1. Extract metadata
+    extract_metadata "$url" "$output_dir" "$slug" || {
+        log_warn "Metadata extraction failed, continuing..."
+    }
+
+    local transcript_file="$output_dir/${slug}.transcript.txt"
+    local has_transcript=0
+
+    # 2. Try to download existing subtitles
+    if [ "$NO_WHISPER" != "1" ] || [ "$FORCE_WHISPER" != "1" ]; then
+        # Check PeerTube API first
+        check_peertube_captions "$video_id" "$instance" 2>/dev/null
+
+        if download_subtitles "$url" "$output_dir" "$slug" "$WHISPER_LANG"; then
+            has_transcript=1
+        fi
+    fi
+
+    # 3. Run Whisper if needed
+    if [ "$has_transcript" = "0" ] || [ "$FORCE_WHISPER" = "1" ]; then
+        if [ "$NO_WHISPER" = "1" ]; then
+            log_warn "Whisper disabled, no transcript available"
+        elif [ -n "$WHISPER_CMD" ]; then
+            local audio_file="$output_dir/${slug}.audio.wav"
+
+            if extract_audio "$url" "$output_dir" "$slug"; then
+                if run_whisper "$audio_file" "$output_dir" "$slug" "$WHISPER_MODEL" "$WHISPER_LANG"; then
+                    has_transcript=1
+                fi
+                # Clean up audio file
+                rm -f "$audio_file"
+            fi
+        else
+            log_warn "No Whisper available and no subtitles found"
+        fi
+    fi
+
+    # 4. Run Claude analysis
+    if [ "$NO_ANALYSE" != "1" ] && [ "$has_transcript" = "1" ]; then
+        local meta_file="$output_dir/${slug}.meta.json"
+        analyse_with_claude "$meta_file" "$transcript_file" "$output_dir" "$slug" || {
+            log_warn "Analysis failed"
+        }
+    elif [ "$NO_ANALYSE" = "1" ]; then
+        log_info "Analysis disabled (--no-analyse)"
+    else
+        log_warn "No transcript available for analysis"
+    fi
+
+    # Summary
+    log_step "Processing complete"
+    printf "\nOutput files in %s:\n" "$output_dir"
+    ls -la "$output_dir" 2>/dev/null | tail -n +2
+
+    return 0
+}
+
+#=============================================================================
+# CLI PARSING
+#=============================================================================
+
+show_help() {
+    cat << EOF
+${BOLD}PeerTube Video Transcript & Analysis Tool${NC}
+SecuBox Intelligence Module v${SCRIPT_VERSION}
+
+${BOLD}Usage:${NC}
+  $(basename "$0") [OPTIONS] --url <video_url>
+  $(basename "$0") [OPTIONS] <video_url>
+
+${BOLD}Options:${NC}
+  --url <url>         PeerTube video URL
+  --no-whisper        Disable Whisper (subtitles only)
+  --force-whisper     Force Whisper even if subtitles exist
+  --no-analyse        Download/transcribe without Claude analysis
+  --model <name>      Whisper model (tiny, base, small, medium, large-v3)
+                      Default: ${WHISPER_MODEL}
+  --lang <code>       Whisper language code (fr, en, de, etc.)
+                      Default: ${WHISPER_LANG}
+  --output <dir>      Output base directory
+                      Default: ${OUTPUT_BASE}
+  --claude-model <m>  Claude model for analysis
+                      Default: ${CLAUDE_MODEL}
+  -h, --help          Show this help message
+  -v, --version       Show version
+
+${BOLD}Environment Variables:${NC}
+  ANTHROPIC_API_KEY   Claude API key (required for analysis)
+  PEERTUBE_INSTANCE   Default PeerTube instance
+  WHISPER_MODELS_PATH Path to Whisper models
+
+${BOLD}Examples:${NC}
+  # Basic usage
+  $(basename "$0") https://tube.gk2.secubox.in/w/abc123
+
+  # Force Whisper transcription with large model
+  $(basename "$0") --force-whisper --model large-v3 --url https://...
+
+  # Subtitles only, no AI analysis
+  $(basename "$0") --no-whisper --no-analyse https://...
+
+${BOLD}Output Structure:${NC}
+  ./output/<slug>/
+    ├── <slug>.meta.json      # Video metadata
+    ├── <slug>.fr.vtt         # Original subtitles (if available)
+    ├── <slug>.transcript.txt # Plain text transcript
+    └── <slug>.analyse.md     # Claude AI analysis
+
+EOF
+}
+
+show_version() {
+    echo "PeerTube Analyse v${SCRIPT_VERSION}"
+    echo "SecuBox Intelligence Module"
+}
+
+parse_args() {
+    VIDEO_URL=""
+    NO_WHISPER=""
+    FORCE_WHISPER=""
+    NO_ANALYSE=""
+
+    while [ $# -gt 0 ]; do
+        case "$1" in
+            --url)
+                VIDEO_URL="$2"
+                shift 2
+                ;;
+            --no-whisper)
+                NO_WHISPER=1
+                shift
+                ;;
+            --force-whisper)
+                FORCE_WHISPER=1
+                shift
+                ;;
+            --no-analyse|--no-analyze)
+                NO_ANALYSE=1
+                shift
+                ;;
+            --model)
+                WHISPER_MODEL="$2"
+                shift 2
+                ;;
+            --lang)
+                WHISPER_LANG="$2"
+                shift 2
+                ;;
+            --output)
+                OUTPUT_BASE="$2"
+                shift 2
+                ;;
+            --claude-model)
+                CLAUDE_MODEL="$2"
+                shift 2
+                ;;
+            -h|--help)
+                show_help
+                exit 0
+                ;;
+            -v|--version)
+                show_version
+                exit 0
+                ;;
+            -*)
+                log_error "Unknown option: $1"
+                show_help
+                exit 1
+                ;;
+            *)
+                # Positional argument = URL
+                if [ -z "$VIDEO_URL" ]; then
+                    VIDEO_URL="$1"
+                else
+                    log_error "Multiple URLs not supported"
+                    exit 1
+                fi
+                shift
+                ;;
+        esac
+    done
+
+    if [ -z "$VIDEO_URL" ]; then
+        log_error "No video URL provided"
+        show_help
+        exit 1
+    fi
+}
+
+#=============================================================================
+# ENTRY POINT
+#=============================================================================
+
+main() {
+    parse_args "$@"
+
+    printf "\n%b╔══════════════════════════════════════════════════════╗%b\n" "$BOLD$CYAN" "$NC"
+    printf "%b║  PeerTube Transcript & Analysis Tool v%-16s║%b\n" "$BOLD$CYAN" "$SCRIPT_VERSION" "$NC"
+    printf "%b║  SecuBox Intelligence Module                         ║%b\n" "$BOLD$CYAN" "$NC"
+    printf "%b╚══════════════════════════════════════════════════════╝%b\n\n" "$BOLD$CYAN" "$NC"
+
+    check_dependencies || exit 1
+
+    process_video "$VIDEO_URL"
+
+    local exit_code=$?
+
+    if [ $exit_code -eq 0 ]; then
+        printf "\n%b✓ All done!%b\n\n" "$BOLD$GREEN" "$NC"
+    else
+        printf "\n%b✗ Completed with errors%b\n\n" "$BOLD$YELLOW" "$NC"
+    fi
+
+    return $exit_code
+}
+
+# Run if not sourced
+if [ "${0##*/}" = "peertube-analyse" ] || [ "${0##*/}" = "sh" ]; then
+    main "$@"
+fi