secubox-openwrt/package/secubox/luci-app-localai/root/usr/libexec/rpcd/luci.localai
CyberMind-FR daa4c48375 fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping
- Add gte-small embedding model preset to localaictl with proper YAML
  config (embeddings: true, context_size: 512)
- Fix RPC expect declarations across api.js, dashboard.js, models.js to
  use empty expect objects, preserving full response including error fields
- Replace fragile sed/awk JSON escaping in RPCD chat and completion
  handlers with file I/O streaming through awk for robust handling of
  special characters in LLM responses
- Switch RPCD chat handler from curl to wget to avoid missing output
  file on timeout (curl doesn't create -o file on exit code 28)
- Bypass RPCD 30s script timeout for chat by calling LocalAI API
  directly from the browser via fetch()
- Add embeddings flag to models RPC and filter embedding models from
  chat view model selector

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 08:36:20 +01:00

528 lines
12 KiB
Bash
Executable File

#!/bin/sh
# RPCD backend for LocalAI LuCI integration
# Copyright (C) 2025 CyberMind.fr
. /lib/functions.sh
CONFIG="localai"
LOCALAI_CTL="/usr/sbin/localaictl"
# Load UCI config
load_config() {
config_load "$CONFIG"
config_get API_PORT main api_port "8081"
config_get DATA_PATH main data_path "/srv/localai"
config_get MODELS_PATH main models_path "/srv/localai/models"
config_get MEMORY_LIMIT main memory_limit "2G"
config_get THREADS main threads "4"
config_get CONTEXT_SIZE main context_size "2048"
}
# Check if LocalAI is running (supports LXC, Docker, Podman)
is_running() {
# Check LXC container
if command -v lxc-info >/dev/null 2>&1; then
lxc-info -n localai -s 2>/dev/null | grep -q "RUNNING" && return 0
fi
# Check Podman container
if command -v podman >/dev/null 2>&1; then
podman ps --format '{{.Names}}' 2>/dev/null | grep -q "^localai$" && return 0
fi
# Check Docker container
if command -v docker >/dev/null 2>&1; then
docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^localai$" && return 0
fi
# Fallback to direct process check (native binary)
pgrep -f "local-ai" >/dev/null 2>&1
}
# Get service status
get_status() {
load_config
local running="false"
local uptime=0
if is_running; then
running="true"
# Try to get process/container uptime
local pid=$(pgrep -f "local-ai" | head -1)
if [ -n "$pid" ] && [ -d "/proc/$pid" ]; then
local start_time=$(stat -c %Y /proc/$pid 2>/dev/null || echo 0)
local now=$(date +%s)
uptime=$((now - start_time))
fi
fi
# Get enabled status
local enabled="false"
[ "$(uci -q get ${CONFIG}.main.enabled)" = "1" ] && enabled="true"
cat <<EOF
{
"running": $running,
"enabled": $enabled,
"uptime": $uptime,
"api_port": $API_PORT,
"memory_limit": "$MEMORY_LIMIT",
"threads": $THREADS,
"context_size": $CONTEXT_SIZE,
"data_path": "$DATA_PATH",
"models_path": "$MODELS_PATH"
}
EOF
}
# Get installed models - queries both LocalAI API and filesystem
get_models() {
load_config
local tmpfile="/tmp/localai_models_$$"
local first=1
local seen=""
echo '{"models":['
# First, try to get models from LocalAI API (shows loaded/active models)
if is_running; then
wget -q -O "$tmpfile" "http://127.0.0.1:$API_PORT/v1/models" 2>/dev/null
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
# Try indexed access for each model (max 20)
local i=0
while [ $i -lt 20 ]; do
local model_id=$(jsonfilter -i "$tmpfile" -e "@.data[$i].id" 2>/dev/null)
[ -z "$model_id" ] && break
[ $first -eq 0 ] && echo ","
first=0
seen="$seen $model_id"
# Check YAML config for embeddings flag
local is_embedding="false"
for yext in yaml yml; do
if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
break
fi
done
cat <<EOF
{
"id": "$model_id",
"name": "$model_id",
"size": 0,
"type": "loaded",
"loaded": true,
"embeddings": $is_embedding
}
EOF
i=$((i + 1))
done
fi
rm -f "$tmpfile"
fi
# Scan filesystem for model files
if [ -d "$MODELS_PATH" ]; then
for model in "$MODELS_PATH"/*.gguf "$MODELS_PATH"/*.bin "$MODELS_PATH"/*.onnx; do
[ -f "$model" ] || continue
local name=$(basename "$model")
local basename_no_ext="${name%.*}"
local size=$(stat -c %s "$model" 2>/dev/null || echo 0)
local ext="${name##*.}"
local type="unknown"
local loaded="false"
case "$ext" in
gguf) type="llama-cpp" ;;
bin) type="transformers" ;;
onnx) type="onnx" ;;
esac
# Check if this model is in the seen list (loaded from API)
case " $seen " in
*" $basename_no_ext "*) continue ;;
esac
[ $first -eq 0 ] && echo ","
first=0
cat <<EOF
{
"id": "$basename_no_ext",
"name": "$name",
"size": $size,
"type": "$type",
"path": "$model",
"loaded": $loaded
}
EOF
done
fi
# Also scan for YAML model configs (LocalAI model definitions)
if [ -d "$MODELS_PATH" ]; then
for yaml in "$MODELS_PATH"/*.yaml "$MODELS_PATH"/*.yml; do
[ -f "$yaml" ] || continue
local name=$(basename "$yaml")
local basename_no_ext="${name%.*}"
# Skip if already seen
case " $seen " in
*" $basename_no_ext "*) continue ;;
esac
# Check if there's a model file with same base name already shown
local already_shown=0
for ext in gguf bin onnx; do
[ -f "$MODELS_PATH/$basename_no_ext.$ext" ] && already_shown=1
done
[ $already_shown -eq 1 ] && continue
[ $first -eq 0 ] && echo ","
first=0
cat <<EOF
{
"id": "$basename_no_ext",
"name": "$basename_no_ext",
"size": 0,
"type": "config",
"path": "$yaml",
"loaded": false
}
EOF
done
fi
echo ']}'
}
# Get configuration
get_config() {
load_config
cat <<EOF
{
"api_port": $API_PORT,
"data_path": "$DATA_PATH",
"models_path": "$MODELS_PATH",
"memory_limit": "$MEMORY_LIMIT",
"threads": $THREADS,
"context_size": $CONTEXT_SIZE
}
EOF
}
# Health check
get_health() {
load_config
local healthy="false"
local api_status="unknown"
if is_running; then
# Check API health endpoint
local response=$(wget -q -O - "http://127.0.0.1:$API_PORT/readyz" 2>/dev/null)
if echo "$response" | grep -qi "ok"; then
healthy="true"
api_status="ok"
else
api_status="unhealthy"
fi
else
api_status="stopped"
fi
cat <<EOF
{
"healthy": $healthy,
"api_status": "$api_status"
}
EOF
}
# Get metrics
get_metrics() {
load_config
local mem_used=0
local cpu_percent=0
if is_running; then
local pid=$(pgrep local-ai | head -1)
if [ -n "$pid" ]; then
# Get memory usage from /proc
mem_used=$(awk '/VmRSS/ {print $2*1024}' /proc/$pid/status 2>/dev/null || echo 0)
# Get CPU from ps
cpu_percent=$(ps -o %cpu= -p $pid 2>/dev/null | tr -d ' ' || echo "0")
fi
fi
cat <<EOF
{
"memory_used": $mem_used,
"cpu_percent": $cpu_percent
}
EOF
}
# Start service
do_start() {
if is_running; then
echo '{"success":false,"error":"Already running"}'
return
fi
/etc/init.d/localai start >/dev/null 2>&1
sleep 2
if is_running; then
echo '{"success":true}'
else
echo '{"success":false,"error":"Failed to start"}'
fi
}
# Stop service
do_stop() {
/etc/init.d/localai stop >/dev/null 2>&1
sleep 1
if ! is_running; then
echo '{"success":true}'
else
echo '{"success":false,"error":"Failed to stop"}'
fi
}
# Restart service
do_restart() {
/etc/init.d/localai restart >/dev/null 2>&1
sleep 3
if is_running; then
echo '{"success":true}'
else
echo '{"success":false,"error":"Failed to restart"}'
fi
}
# Install model
do_model_install() {
local name="$1"
[ -z "$name" ] && { echo '{"success":false,"error":"Model name required"}'; return; }
local output=$($LOCALAI_CTL model-install "$name" 2>&1)
local ret=$?
if [ $ret -eq 0 ]; then
echo '{"success":true}'
else
local error=$(echo "$output" | tail -1 | sed 's/"/\\"/g')
echo "{\"success\":false,\"error\":\"$error\"}"
fi
}
# Remove model
do_model_remove() {
local name="$1"
[ -z "$name" ] && { echo '{"success":false,"error":"Model name required"}'; return; }
local output=$($LOCALAI_CTL model-remove "$name" 2>&1)
local ret=$?
if [ $ret -eq 0 ]; then
echo '{"success":true}'
else
local error=$(echo "$output" | tail -1 | sed 's/"/\\"/g')
echo "{\"success\":false,\"error\":\"$error\"}"
fi
}
# Chat completion (proxy to LocalAI API)
do_chat() {
load_config
local model="$1"
local messages="$2"
if ! is_running; then
echo '{"response":"","error":"LocalAI is not running. Start with: /etc/init.d/localai start"}'
return
fi
# Validate inputs
[ -z "$model" ] && { echo '{"response":"","error":"Model not specified"}'; return; }
[ -z "$messages" ] && { echo '{"response":"","error":"Messages not provided"}'; return; }
# Messages comes as JSON string from LuCI RPC - it should be a valid JSON array
# Build request body for LocalAI /v1/chat/completions endpoint
local request_body="{\"model\":\"$model\",\"messages\":$messages}"
# Log for debugging
logger -t localai-chat "Request to model: $model"
# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
local tmpfile="/tmp/localai_chat_$$"
local tmpfile_err="/tmp/localai_chat_err_$$"
# Use longer timeout for LLM responses (120 seconds)
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
# Log raw response for debugging
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
# Check for API error first
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
if [ -n "$error" ]; then
error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
echo "{\"response\":\"\",\"error\":\"$error\"}"
else
# Extract content and build JSON via file I/O to handle
# large responses and special characters safely
local outfile="/tmp/localai_out_$$"
local has_content=0
jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
printf '{"response":"'
awk '
BEGIN { ORS="" }
{
gsub(/\\/, "\\\\")
gsub(/"/, "\\\"")
gsub(/\t/, "\\t")
gsub(/\r/, "")
if (NR > 1) printf "\\n"
printf "%s", $0
}'
printf '"}'
} > "$outfile"
# Check if we got actual content (file should be > 17 bytes: {"response":""} )
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
if [ "$outsize" -gt 17 ]; then
cat "$outfile"
else
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
fi
rm -f "$outfile"
fi
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
else
local err_msg=""
[ -f "$tmpfile_err" ] && err_msg=$(cat "$tmpfile_err" | head -c 200 | sed 's/"/\\"/g')
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
if [ -n "$err_msg" ]; then
echo "{\"response\":\"\",\"error\":\"API request failed: $err_msg\"}"
else
echo '{"response":"","error":"API request failed - check if LocalAI is running and model is loaded"}'
fi
fi
}
# Text completion
do_complete() {
load_config
local model="$1"
local prompt="$2"
if ! is_running; then
echo '{"text":"","error":"LocalAI is not running"}'
return
fi
local tmpfile="/tmp/localai_comp_$$"
wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
local outfile="/tmp/localai_compout_$$"
jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
printf '{"text":"'
awk '
BEGIN { ORS="" }
{
gsub(/\\/, "\\\\")
gsub(/"/, "\\\"")
gsub(/\t/, "\\t")
gsub(/\r/, "")
if (NR > 1) printf "\\n"
printf "%s", $0
}'
printf '"}'
} > "$outfile"
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
if [ "$outsize" -gt 11 ]; then
cat "$outfile"
else
echo '{"text":"","error":"Empty response from API"}'
fi
rm -f "$outfile" "$tmpfile"
else
rm -f "$tmpfile"
echo '{"text":"","error":"API request failed"}'
fi
}
# UBUS method list
case "$1" in
list)
cat <<'EOF'
{
"status": {},
"models": {},
"config": {},
"health": {},
"metrics": {},
"start": {},
"stop": {},
"restart": {},
"model_install": {"name": "string"},
"model_remove": {"name": "string"},
"chat": {"model": "string", "messages": "array"},
"complete": {"model": "string", "prompt": "string"}
}
EOF
;;
call)
case "$2" in
status) get_status ;;
models) get_models ;;
config) get_config ;;
health) get_health ;;
metrics) get_metrics ;;
start) do_start ;;
stop) do_stop ;;
restart) do_restart ;;
model_install)
read -r input
name=$(echo "$input" | jsonfilter -e '@.name' 2>/dev/null)
do_model_install "$name"
;;
model_remove)
read -r input
name=$(echo "$input" | jsonfilter -e '@.name' 2>/dev/null)
do_model_remove "$name"
;;
chat)
read -r input
model=$(echo "$input" | jsonfilter -e '@.model' 2>/dev/null)
messages=$(echo "$input" | jsonfilter -e '@.messages' 2>/dev/null)
do_chat "$model" "$messages"
;;
complete)
read -r input
model=$(echo "$input" | jsonfilter -e '@.model' 2>/dev/null)
prompt=$(echo "$input" | jsonfilter -e '@.prompt' 2>/dev/null)
do_complete "$model" "$prompt"
;;
*) echo '{"error":"Unknown method"}' ;;
esac
;;
esac