#!/bin/sh
#
# Media Flow nDPId Collector
# Collects streaming service data from nDPId flows and stores in history
# Uses nDPId's local DPI detection (no cloud subscription required)
#
# Copyright (C) 2025 CyberMind.fr
# NOTE: Uses contains() instead of test() for jq without ONIGURUMA regex

HISTORY_FILE="/tmp/media-flow-history.json"
NDPID_FLOWS="/tmp/ndpid-flows.json"
NDPID_APPS="/tmp/ndpid-apps.json"
MEDIA_CACHE="/tmp/media-flow-ndpid-cache.json"
MAX_ENTRIES=1000
LOCK_FILE="/tmp/media-flow-ndpid-collector.lock"

# Check if already running
if [ -f "$LOCK_FILE" ]; then
	pid=$(cat "$LOCK_FILE" 2>/dev/null)
	if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
		exit 0
	fi
fi

echo $$ > "$LOCK_FILE"
trap "rm -f $LOCK_FILE" EXIT

# Check if enabled
enabled=$(uci -q get media_flow.global.enabled 2>/dev/null || echo "1")
[ "$enabled" != "1" ] && exit 0

# Check if nDPId data is available
if [ ! -f "$NDPID_FLOWS" ]; then
	if ! pgrep ndpid > /dev/null 2>&1; then
		exit 0
	fi
fi

# Initialize history file
[ ! -f "$HISTORY_FILE" ] && echo '[]' > "$HISTORY_FILE"

# Process nDPId flows using contains() instead of test() regex
# This works with jq compiled without ONIGURUMA
if [ -f "$NDPID_FLOWS" ] && command -v jq >/dev/null 2>&1; then
	timestamp=$(date -Iseconds)

	# Extract streaming flows - using contains() for pattern matching
	# Matches: YouTube, Netflix, Disney, Twitch, Spotify, WhatsApp, Zoom, Teams, etc.
	new_entries=$(jq -c --arg ts "$timestamp" '
		# Helper function to check if app is a streaming service
		def is_streaming:
			(. | ascii_downcase) as $app |
			($app | contains("youtube")) or
			($app | contains("netflix")) or
			($app | contains("disney")) or
			($app | contains("amazon")) or
			($app | contains("prime")) or
			($app | contains("twitch")) or
			($app | contains("hbo")) or
			($app | contains("hulu")) or
			($app | contains("vimeo")) or
			($app | contains("peacock")) or
			($app | contains("paramount")) or
			($app | contains("plex")) or
			($app | contains("appletv")) or
			($app | contains("spotify")) or
			($app | contains("applemusic")) or
			($app | contains("deezer")) or
			($app | contains("soundcloud")) or
			($app | contains("tidal")) or
			($app | contains("pandora")) or
			($app | contains("audible")) or
			($app | contains("zoom")) or
			($app | contains("teams")) or
			($app | contains("meet")) or
			($app | contains("discord")) or
			($app | contains("skype")) or
			($app | contains("webex")) or
			($app | contains("facetime")) or
			($app | contains("whatsapp")) or
			($app | contains("signal")) or
			($app | contains("telegram")) or
			($app | contains("slack"));

		# Helper function to get category
		def get_category:
			(. | ascii_downcase) as $app |
			if ($app | contains("youtube")) or ($app | contains("netflix")) or ($app | contains("disney")) or
			   ($app | contains("amazon")) or ($app | contains("twitch")) or ($app | contains("hbo")) or
			   ($app | contains("hulu")) or ($app | contains("vimeo")) or ($app | contains("plex")) or
			   ($app | contains("appletv")) or ($app | contains("paramount")) or ($app | contains("peacock"))
			then "video"
			elif ($app | contains("spotify")) or ($app | contains("applemusic")) or ($app | contains("deezer")) or
			     ($app | contains("soundcloud")) or ($app | contains("tidal")) or ($app | contains("pandora")) or
			     ($app | contains("audible"))
			then "audio"
			elif ($app | contains("zoom")) or ($app | contains("teams")) or ($app | contains("meet")) or
			     ($app | contains("discord")) or ($app | contains("skype")) or ($app | contains("webex")) or
			     ($app | contains("facetime")) or ($app | contains("whatsapp")) or ($app | contains("signal")) or
			     ($app | contains("telegram")) or ($app | contains("slack"))
			then "visio"
			else "other"
			end;

		# Helper function to estimate quality from bandwidth (kbps)
		def get_quality(cat):
			if cat == "audio" then
				if . < 96 then "Low" elif . < 192 then "Normal" elif . < 320 then "High" else "Lossless" end
			elif cat == "visio" then
				if . < 500 then "Audio" elif . < 1500 then "SD" elif . < 3000 then "HD" else "FHD" end
			else
				if . < 1000 then "SD" elif . < 3000 then "HD" elif . < 8000 then "FHD" else "4K" end
			end;

		[.[] |
			select(.app != null and .app != "" and .app != "Unknown") |
			select(.app | is_streaming) |
			select(.state == "active" or .bytes_rx > 10000 or .bytes_tx > 10000) |
			(.app | get_category) as $cat |
			(((.bytes_rx // 0) + (.bytes_tx // 0)) * 8 / 1000 | floor) as $bw |
			{
				timestamp: $ts,
				app: .app,
				client: (.src_ip // "unknown"),
				server: (.dst_ip // "unknown"),
				hostname: (.hostname // null),
				protocol: (.proto // "unknown"),
				bytes_rx: (.bytes_rx // 0),
				bytes_tx: (.bytes_tx // 0),
				packets: (.packets // 0),
				confidence: (.confidence // "Unknown"),
				ndpi_category: (.category // "Unknown"),
				flow_id: (.id // 0),
				state: (.state // "active"),
				duration: 1,
				bandwidth: $bw,
				category: $cat,
				quality: ($bw | get_quality($cat))
			}
		] |
		# Only include flows with significant traffic
		[.[] | select(.bytes_rx > 5000 or .bytes_tx > 5000 or .packets > 50)]
	' "$NDPID_FLOWS" 2>/dev/null)

	# Save current state to cache for frontend
	if [ -n "$new_entries" ] && [ "$new_entries" != "[]" ] && [ "$new_entries" != "null" ]; then
		echo "$new_entries" > "$MEDIA_CACHE"

		# Merge with history (avoid duplicates)
		jq -c --argjson new "$new_entries" '
			. + ($new | map(del(.flow_id, .state))) |
			unique_by(.client + .app + (.timestamp | split("T")[0])) |
			.[-'"$MAX_ENTRIES"':]
		' "$HISTORY_FILE" > "${HISTORY_FILE}.tmp" 2>/dev/null && mv "${HISTORY_FILE}.tmp" "$HISTORY_FILE"
	else
		echo '[]' > "$MEDIA_CACHE"
	fi
fi

# Also process nDPId apps file for aggregated stats (without regex)
if [ -f "$NDPID_APPS" ] && command -v jq >/dev/null 2>&1; then
	jq -c '
		def is_streaming:
			(.name | ascii_downcase) as $app |
			($app | contains("youtube")) or ($app | contains("netflix")) or
			($app | contains("spotify")) or ($app | contains("whatsapp")) or
			($app | contains("discord")) or ($app | contains("zoom")) or
			($app | contains("teams")) or ($app | contains("twitch")) or
			($app | contains("disney")) or ($app | contains("amazon"));
		[.[] | select(is_streaming)] | sort_by(-.bytes) | .[0:20]
	' "$NDPID_APPS" > "/tmp/media-flow-apps.json" 2>/dev/null
fi

# Clean old entries based on retention (days)
retention=$(uci -q get media_flow.global.history_retention 2>/dev/null || echo "7")
if [ "$retention" -gt 0 ] 2>/dev/null; then
	cutoff_date=$(date -d "$retention days ago" -Iseconds 2>/dev/null || date -Iseconds)
	jq -c --arg cutoff "$cutoff_date" '[.[] | select(.timestamp >= $cutoff)]' "$HISTORY_FILE" > "${HISTORY_FILE}.tmp" 2>/dev/null && mv "${HISTORY_FILE}.tmp" "$HISTORY_FILE"
fi

exit 0
