- Add gte-small embedding model preset to localaictl with proper YAML config (embeddings: true, context_size: 512) - Fix RPC expect declarations across api.js, dashboard.js, models.js to use empty expect objects, preserving full response including error fields - Replace fragile sed/awk JSON escaping in RPCD chat and completion handlers with file I/O streaming through awk for robust handling of special characters in LLM responses - Switch RPCD chat handler from curl to wget to avoid missing output file on timeout (curl doesn't create -o file on exit code 28) - Bypass RPCD 30s script timeout for chat by calling LocalAI API directly from the browser via fetch() - Add embeddings flag to models RPC and filter embedding models from chat view model selector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
528 lines
12 KiB
Bash
Executable File
528 lines
12 KiB
Bash
Executable File
#!/bin/sh
|
|
# RPCD backend for LocalAI LuCI integration
|
|
# Copyright (C) 2025 CyberMind.fr
|
|
|
|
. /lib/functions.sh
|
|
|
|
CONFIG="localai"
|
|
LOCALAI_CTL="/usr/sbin/localaictl"
|
|
|
|
# Load UCI config
|
|
load_config() {
|
|
config_load "$CONFIG"
|
|
config_get API_PORT main api_port "8081"
|
|
config_get DATA_PATH main data_path "/srv/localai"
|
|
config_get MODELS_PATH main models_path "/srv/localai/models"
|
|
config_get MEMORY_LIMIT main memory_limit "2G"
|
|
config_get THREADS main threads "4"
|
|
config_get CONTEXT_SIZE main context_size "2048"
|
|
}
|
|
|
|
# Check if LocalAI is running (supports LXC, Docker, Podman)
|
|
is_running() {
|
|
# Check LXC container
|
|
if command -v lxc-info >/dev/null 2>&1; then
|
|
lxc-info -n localai -s 2>/dev/null | grep -q "RUNNING" && return 0
|
|
fi
|
|
# Check Podman container
|
|
if command -v podman >/dev/null 2>&1; then
|
|
podman ps --format '{{.Names}}' 2>/dev/null | grep -q "^localai$" && return 0
|
|
fi
|
|
# Check Docker container
|
|
if command -v docker >/dev/null 2>&1; then
|
|
docker ps --format '{{.Names}}' 2>/dev/null | grep -q "^localai$" && return 0
|
|
fi
|
|
# Fallback to direct process check (native binary)
|
|
pgrep -f "local-ai" >/dev/null 2>&1
|
|
}
|
|
|
|
# Get service status
|
|
get_status() {
|
|
load_config
|
|
local running="false"
|
|
local uptime=0
|
|
|
|
if is_running; then
|
|
running="true"
|
|
# Try to get process/container uptime
|
|
local pid=$(pgrep -f "local-ai" | head -1)
|
|
if [ -n "$pid" ] && [ -d "/proc/$pid" ]; then
|
|
local start_time=$(stat -c %Y /proc/$pid 2>/dev/null || echo 0)
|
|
local now=$(date +%s)
|
|
uptime=$((now - start_time))
|
|
fi
|
|
fi
|
|
|
|
# Get enabled status
|
|
local enabled="false"
|
|
[ "$(uci -q get ${CONFIG}.main.enabled)" = "1" ] && enabled="true"
|
|
|
|
cat <<EOF
|
|
{
|
|
"running": $running,
|
|
"enabled": $enabled,
|
|
"uptime": $uptime,
|
|
"api_port": $API_PORT,
|
|
"memory_limit": "$MEMORY_LIMIT",
|
|
"threads": $THREADS,
|
|
"context_size": $CONTEXT_SIZE,
|
|
"data_path": "$DATA_PATH",
|
|
"models_path": "$MODELS_PATH"
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Get installed models - queries both LocalAI API and filesystem
|
|
get_models() {
|
|
load_config
|
|
|
|
local tmpfile="/tmp/localai_models_$$"
|
|
local first=1
|
|
local seen=""
|
|
|
|
echo '{"models":['
|
|
|
|
# First, try to get models from LocalAI API (shows loaded/active models)
|
|
if is_running; then
|
|
wget -q -O "$tmpfile" "http://127.0.0.1:$API_PORT/v1/models" 2>/dev/null
|
|
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
|
# Try indexed access for each model (max 20)
|
|
local i=0
|
|
while [ $i -lt 20 ]; do
|
|
local model_id=$(jsonfilter -i "$tmpfile" -e "@.data[$i].id" 2>/dev/null)
|
|
[ -z "$model_id" ] && break
|
|
[ $first -eq 0 ] && echo ","
|
|
first=0
|
|
seen="$seen $model_id"
|
|
# Check YAML config for embeddings flag
|
|
local is_embedding="false"
|
|
for yext in yaml yml; do
|
|
if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
|
|
grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
|
|
break
|
|
fi
|
|
done
|
|
cat <<EOF
|
|
{
|
|
"id": "$model_id",
|
|
"name": "$model_id",
|
|
"size": 0,
|
|
"type": "loaded",
|
|
"loaded": true,
|
|
"embeddings": $is_embedding
|
|
}
|
|
EOF
|
|
i=$((i + 1))
|
|
done
|
|
fi
|
|
rm -f "$tmpfile"
|
|
fi
|
|
|
|
# Scan filesystem for model files
|
|
if [ -d "$MODELS_PATH" ]; then
|
|
for model in "$MODELS_PATH"/*.gguf "$MODELS_PATH"/*.bin "$MODELS_PATH"/*.onnx; do
|
|
[ -f "$model" ] || continue
|
|
local name=$(basename "$model")
|
|
local basename_no_ext="${name%.*}"
|
|
local size=$(stat -c %s "$model" 2>/dev/null || echo 0)
|
|
local ext="${name##*.}"
|
|
local type="unknown"
|
|
local loaded="false"
|
|
|
|
case "$ext" in
|
|
gguf) type="llama-cpp" ;;
|
|
bin) type="transformers" ;;
|
|
onnx) type="onnx" ;;
|
|
esac
|
|
|
|
# Check if this model is in the seen list (loaded from API)
|
|
case " $seen " in
|
|
*" $basename_no_ext "*) continue ;;
|
|
esac
|
|
|
|
[ $first -eq 0 ] && echo ","
|
|
first=0
|
|
|
|
cat <<EOF
|
|
{
|
|
"id": "$basename_no_ext",
|
|
"name": "$name",
|
|
"size": $size,
|
|
"type": "$type",
|
|
"path": "$model",
|
|
"loaded": $loaded
|
|
}
|
|
EOF
|
|
done
|
|
fi
|
|
|
|
# Also scan for YAML model configs (LocalAI model definitions)
|
|
if [ -d "$MODELS_PATH" ]; then
|
|
for yaml in "$MODELS_PATH"/*.yaml "$MODELS_PATH"/*.yml; do
|
|
[ -f "$yaml" ] || continue
|
|
local name=$(basename "$yaml")
|
|
local basename_no_ext="${name%.*}"
|
|
|
|
# Skip if already seen
|
|
case " $seen " in
|
|
*" $basename_no_ext "*) continue ;;
|
|
esac
|
|
|
|
# Check if there's a model file with same base name already shown
|
|
local already_shown=0
|
|
for ext in gguf bin onnx; do
|
|
[ -f "$MODELS_PATH/$basename_no_ext.$ext" ] && already_shown=1
|
|
done
|
|
[ $already_shown -eq 1 ] && continue
|
|
|
|
[ $first -eq 0 ] && echo ","
|
|
first=0
|
|
|
|
cat <<EOF
|
|
{
|
|
"id": "$basename_no_ext",
|
|
"name": "$basename_no_ext",
|
|
"size": 0,
|
|
"type": "config",
|
|
"path": "$yaml",
|
|
"loaded": false
|
|
}
|
|
EOF
|
|
done
|
|
fi
|
|
|
|
echo ']}'
|
|
}
|
|
|
|
# Get configuration
|
|
get_config() {
|
|
load_config
|
|
|
|
cat <<EOF
|
|
{
|
|
"api_port": $API_PORT,
|
|
"data_path": "$DATA_PATH",
|
|
"models_path": "$MODELS_PATH",
|
|
"memory_limit": "$MEMORY_LIMIT",
|
|
"threads": $THREADS,
|
|
"context_size": $CONTEXT_SIZE
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Health check
|
|
get_health() {
|
|
load_config
|
|
|
|
local healthy="false"
|
|
local api_status="unknown"
|
|
|
|
if is_running; then
|
|
# Check API health endpoint
|
|
local response=$(wget -q -O - "http://127.0.0.1:$API_PORT/readyz" 2>/dev/null)
|
|
if echo "$response" | grep -qi "ok"; then
|
|
healthy="true"
|
|
api_status="ok"
|
|
else
|
|
api_status="unhealthy"
|
|
fi
|
|
else
|
|
api_status="stopped"
|
|
fi
|
|
|
|
cat <<EOF
|
|
{
|
|
"healthy": $healthy,
|
|
"api_status": "$api_status"
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Get metrics
|
|
get_metrics() {
|
|
load_config
|
|
|
|
local mem_used=0
|
|
local cpu_percent=0
|
|
|
|
if is_running; then
|
|
local pid=$(pgrep local-ai | head -1)
|
|
if [ -n "$pid" ]; then
|
|
# Get memory usage from /proc
|
|
mem_used=$(awk '/VmRSS/ {print $2*1024}' /proc/$pid/status 2>/dev/null || echo 0)
|
|
|
|
# Get CPU from ps
|
|
cpu_percent=$(ps -o %cpu= -p $pid 2>/dev/null | tr -d ' ' || echo "0")
|
|
fi
|
|
fi
|
|
|
|
cat <<EOF
|
|
{
|
|
"memory_used": $mem_used,
|
|
"cpu_percent": $cpu_percent
|
|
}
|
|
EOF
|
|
}
|
|
|
|
# Start service
|
|
do_start() {
|
|
if is_running; then
|
|
echo '{"success":false,"error":"Already running"}'
|
|
return
|
|
fi
|
|
|
|
/etc/init.d/localai start >/dev/null 2>&1
|
|
sleep 2
|
|
|
|
if is_running; then
|
|
echo '{"success":true}'
|
|
else
|
|
echo '{"success":false,"error":"Failed to start"}'
|
|
fi
|
|
}
|
|
|
|
# Stop service
|
|
do_stop() {
|
|
/etc/init.d/localai stop >/dev/null 2>&1
|
|
sleep 1
|
|
|
|
if ! is_running; then
|
|
echo '{"success":true}'
|
|
else
|
|
echo '{"success":false,"error":"Failed to stop"}'
|
|
fi
|
|
}
|
|
|
|
# Restart service
|
|
do_restart() {
|
|
/etc/init.d/localai restart >/dev/null 2>&1
|
|
sleep 3
|
|
|
|
if is_running; then
|
|
echo '{"success":true}'
|
|
else
|
|
echo '{"success":false,"error":"Failed to restart"}'
|
|
fi
|
|
}
|
|
|
|
# Install model
|
|
do_model_install() {
|
|
local name="$1"
|
|
[ -z "$name" ] && { echo '{"success":false,"error":"Model name required"}'; return; }
|
|
|
|
local output=$($LOCALAI_CTL model-install "$name" 2>&1)
|
|
local ret=$?
|
|
|
|
if [ $ret -eq 0 ]; then
|
|
echo '{"success":true}'
|
|
else
|
|
local error=$(echo "$output" | tail -1 | sed 's/"/\\"/g')
|
|
echo "{\"success\":false,\"error\":\"$error\"}"
|
|
fi
|
|
}
|
|
|
|
# Remove model
|
|
do_model_remove() {
|
|
local name="$1"
|
|
[ -z "$name" ] && { echo '{"success":false,"error":"Model name required"}'; return; }
|
|
|
|
local output=$($LOCALAI_CTL model-remove "$name" 2>&1)
|
|
local ret=$?
|
|
|
|
if [ $ret -eq 0 ]; then
|
|
echo '{"success":true}'
|
|
else
|
|
local error=$(echo "$output" | tail -1 | sed 's/"/\\"/g')
|
|
echo "{\"success\":false,\"error\":\"$error\"}"
|
|
fi
|
|
}
|
|
|
|
# Chat completion (proxy to LocalAI API)
|
|
do_chat() {
|
|
load_config
|
|
local model="$1"
|
|
local messages="$2"
|
|
|
|
if ! is_running; then
|
|
echo '{"response":"","error":"LocalAI is not running. Start with: /etc/init.d/localai start"}'
|
|
return
|
|
fi
|
|
|
|
# Validate inputs
|
|
[ -z "$model" ] && { echo '{"response":"","error":"Model not specified"}'; return; }
|
|
[ -z "$messages" ] && { echo '{"response":"","error":"Messages not provided"}'; return; }
|
|
|
|
# Messages comes as JSON string from LuCI RPC - it should be a valid JSON array
|
|
# Build request body for LocalAI /v1/chat/completions endpoint
|
|
local request_body="{\"model\":\"$model\",\"messages\":$messages}"
|
|
|
|
# Log for debugging
|
|
logger -t localai-chat "Request to model: $model"
|
|
|
|
# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
|
|
local tmpfile="/tmp/localai_chat_$$"
|
|
local tmpfile_err="/tmp/localai_chat_err_$$"
|
|
|
|
# Use longer timeout for LLM responses (120 seconds)
|
|
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
|
|
--header="Content-Type: application/json" \
|
|
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
|
|
|
|
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
|
# Log raw response for debugging
|
|
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
|
|
|
|
# Check for API error first
|
|
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
|
|
|
|
if [ -n "$error" ]; then
|
|
error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
|
|
echo "{\"response\":\"\",\"error\":\"$error\"}"
|
|
else
|
|
# Extract content and build JSON via file I/O to handle
|
|
# large responses and special characters safely
|
|
local outfile="/tmp/localai_out_$$"
|
|
local has_content=0
|
|
|
|
jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
|
|
printf '{"response":"'
|
|
awk '
|
|
BEGIN { ORS="" }
|
|
{
|
|
gsub(/\\/, "\\\\")
|
|
gsub(/"/, "\\\"")
|
|
gsub(/\t/, "\\t")
|
|
gsub(/\r/, "")
|
|
if (NR > 1) printf "\\n"
|
|
printf "%s", $0
|
|
}'
|
|
printf '"}'
|
|
} > "$outfile"
|
|
|
|
# Check if we got actual content (file should be > 17 bytes: {"response":""} )
|
|
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
|
if [ "$outsize" -gt 17 ]; then
|
|
cat "$outfile"
|
|
else
|
|
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
|
|
fi
|
|
rm -f "$outfile"
|
|
fi
|
|
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
|
|
else
|
|
local err_msg=""
|
|
[ -f "$tmpfile_err" ] && err_msg=$(cat "$tmpfile_err" | head -c 200 | sed 's/"/\\"/g')
|
|
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
|
|
|
|
if [ -n "$err_msg" ]; then
|
|
echo "{\"response\":\"\",\"error\":\"API request failed: $err_msg\"}"
|
|
else
|
|
echo '{"response":"","error":"API request failed - check if LocalAI is running and model is loaded"}'
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Text completion
|
|
do_complete() {
|
|
load_config
|
|
local model="$1"
|
|
local prompt="$2"
|
|
|
|
if ! is_running; then
|
|
echo '{"text":"","error":"LocalAI is not running"}'
|
|
return
|
|
fi
|
|
|
|
local tmpfile="/tmp/localai_comp_$$"
|
|
|
|
wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
|
|
--header="Content-Type: application/json" \
|
|
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
|
|
|
|
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
|
local outfile="/tmp/localai_compout_$$"
|
|
jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
|
|
printf '{"text":"'
|
|
awk '
|
|
BEGIN { ORS="" }
|
|
{
|
|
gsub(/\\/, "\\\\")
|
|
gsub(/"/, "\\\"")
|
|
gsub(/\t/, "\\t")
|
|
gsub(/\r/, "")
|
|
if (NR > 1) printf "\\n"
|
|
printf "%s", $0
|
|
}'
|
|
printf '"}'
|
|
} > "$outfile"
|
|
|
|
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
|
if [ "$outsize" -gt 11 ]; then
|
|
cat "$outfile"
|
|
else
|
|
echo '{"text":"","error":"Empty response from API"}'
|
|
fi
|
|
rm -f "$outfile" "$tmpfile"
|
|
else
|
|
rm -f "$tmpfile"
|
|
echo '{"text":"","error":"API request failed"}'
|
|
fi
|
|
}
|
|
|
|
# UBUS method list
|
|
case "$1" in
|
|
list)
|
|
cat <<'EOF'
|
|
{
|
|
"status": {},
|
|
"models": {},
|
|
"config": {},
|
|
"health": {},
|
|
"metrics": {},
|
|
"start": {},
|
|
"stop": {},
|
|
"restart": {},
|
|
"model_install": {"name": "string"},
|
|
"model_remove": {"name": "string"},
|
|
"chat": {"model": "string", "messages": "array"},
|
|
"complete": {"model": "string", "prompt": "string"}
|
|
}
|
|
EOF
|
|
;;
|
|
call)
|
|
case "$2" in
|
|
status) get_status ;;
|
|
models) get_models ;;
|
|
config) get_config ;;
|
|
health) get_health ;;
|
|
metrics) get_metrics ;;
|
|
start) do_start ;;
|
|
stop) do_stop ;;
|
|
restart) do_restart ;;
|
|
model_install)
|
|
read -r input
|
|
name=$(echo "$input" | jsonfilter -e '@.name' 2>/dev/null)
|
|
do_model_install "$name"
|
|
;;
|
|
model_remove)
|
|
read -r input
|
|
name=$(echo "$input" | jsonfilter -e '@.name' 2>/dev/null)
|
|
do_model_remove "$name"
|
|
;;
|
|
chat)
|
|
read -r input
|
|
model=$(echo "$input" | jsonfilter -e '@.model' 2>/dev/null)
|
|
messages=$(echo "$input" | jsonfilter -e '@.messages' 2>/dev/null)
|
|
do_chat "$model" "$messages"
|
|
;;
|
|
complete)
|
|
read -r input
|
|
model=$(echo "$input" | jsonfilter -e '@.model' 2>/dev/null)
|
|
prompt=$(echo "$input" | jsonfilter -e '@.prompt' 2>/dev/null)
|
|
do_complete "$model" "$prompt"
|
|
;;
|
|
*) echo '{"error":"Unknown method"}' ;;
|
|
esac
|
|
;;
|
|
esac
|