fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping

- Add gte-small embedding model preset to localaictl with proper YAML
  config (embeddings: true, context_size: 512)
- Fix RPC expect declarations across api.js, dashboard.js, models.js to
  use empty expect objects, preserving full response including error fields
- Replace fragile sed/awk JSON escaping in RPCD chat and completion
  handlers with file I/O streaming through awk for robust handling of
  special characters in LLM responses
- Switch RPCD chat handler from curl to wget to avoid missing output
  file on timeout (curl doesn't create -o file on exit code 28)
- Bypass RPCD 30s script timeout for chat by calling LocalAI API
  directly from the browser via fetch()
- Add embeddings flag to models RPC and filter embedding models from
  chat view model selector

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
CyberMind-FR 2026-02-03 19:22:30 +01:00
parent 28acb7e70f
commit daa4c48375
6 changed files with 148 additions and 80 deletions

View File

@ -31,7 +31,7 @@ var callConfig = rpc.declare({
var callHealth = rpc.declare({
object: 'luci.localai',
method: 'health',
expect: { healthy: false }
expect: { }
});
var callMetrics = rpc.declare({
@ -43,47 +43,47 @@ var callMetrics = rpc.declare({
var callStart = rpc.declare({
object: 'luci.localai',
method: 'start',
expect: { success: false }
expect: { }
});
var callStop = rpc.declare({
object: 'luci.localai',
method: 'stop',
expect: { success: false }
expect: { }
});
var callRestart = rpc.declare({
object: 'luci.localai',
method: 'restart',
expect: { success: false }
expect: { }
});
var callModelInstall = rpc.declare({
object: 'luci.localai',
method: 'model_install',
params: ['name'],
expect: { success: false }
expect: { }
});
var callModelRemove = rpc.declare({
object: 'luci.localai',
method: 'model_remove',
params: ['name'],
expect: { success: false }
expect: { }
});
var callChat = rpc.declare({
object: 'luci.localai',
method: 'chat',
params: ['model', 'messages'],
expect: { response: '' }
expect: { }
});
var callComplete = rpc.declare({
object: 'luci.localai',
method: 'complete',
params: ['model', 'prompt'],
expect: { text: '' }
expect: { }
});
function formatBytes(bytes) {

View File

@ -9,51 +9,46 @@ var callModels = rpc.declare({
expect: { models: [] }
});
// Custom chat function with longer timeout (LLMs can be slow)
function callChatWithTimeout(model, messages, timeoutMs) {
var callStatus = rpc.declare({
object: 'luci.localai',
method: 'status',
expect: { }
});
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
function callChatDirect(apiPort, model, messages, timeoutMs) {
return new Promise(function(resolve, reject) {
var timeout = timeoutMs || 120000; // 2 minutes default
var timeout = timeoutMs || 180000; // 3 minutes default for LLM
var controller = new AbortController();
var timeoutId = setTimeout(function() {
controller.abort();
reject(new Error('Request timed out - model may need more time'));
reject(new Error('Request timed out after ' + (timeout/1000) + 's - model may need more time'));
}, timeout);
// Use ubus RPC endpoint
var ubusUrl = L.url('admin/ubus');
var payload = JSON.stringify({
jsonrpc: '2.0',
id: Date.now(),
method: 'call',
params: [
rpc.getSessionID() || '00000000000000000000000000000000',
'luci.localai',
'chat',
{ model: model, messages: messages }
]
});
var apiUrl = window.location.protocol + '//' + window.location.hostname + ':' + apiPort + '/v1/chat/completions';
fetch(ubusUrl, {
fetch(apiUrl, {
method: 'POST',
credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
body: payload,
body: JSON.stringify({ model: model, messages: messages }),
signal: controller.signal
})
.then(function(response) {
clearTimeout(timeoutId);
if (!response.ok) {
throw new Error('HTTP ' + response.status);
return response.text().then(function(t) {
throw new Error('API error HTTP ' + response.status + ': ' + t.substring(0, 200));
});
}
return response.json();
})
.then(function(data) {
clearTimeout(timeoutId);
if (data.result && Array.isArray(data.result) && data.result[1]) {
resolve(data.result[1]);
} else if (data.error) {
reject(new Error(data.error.message || 'RPC error'));
if (data.error) {
resolve({ response: '', error: data.error.message || JSON.stringify(data.error) });
} else if (data.choices && data.choices[0] && data.choices[0].message) {
resolve({ response: data.choices[0].message.content || '' });
} else {
resolve({ response: '', error: 'Unexpected response format' });
resolve({ response: '', error: 'Unexpected API response format' });
}
})
.catch(function(err) {
@ -73,13 +68,19 @@ return view.extend({
selectedModel: null,
load: function() {
return callModels();
return Promise.all([callModels(), callStatus()]);
},
render: function(data) {
var self = this;
var modelsData = data[0];
var statusData = data[1] || {};
this.apiPort = statusData.api_port || 8080;
// RPC with expect returns array directly
var models = Array.isArray(data) ? data : (data && data.models ? data.models : []);
var allModels = Array.isArray(modelsData) ? modelsData : (modelsData && modelsData.models ? modelsData.models : []);
// Filter out embedding models - they can't do chat completions
var models = allModels.filter(function(m) { return !m.embeddings; });
var container = E('div', { 'class': 'localai-chat' }, [
E('style', {}, this.getCSS()),
@ -101,7 +102,7 @@ return view.extend({
var displayName = m.loaded ? modelId + ' ✓' : modelId;
return E('option', { 'value': modelId }, displayName);
}) :
[E('option', { 'value': '' }, _('No models available'))]
[E('option', { 'value': '' }, _('No chat models available'))]
)
])
]),
@ -188,9 +189,8 @@ return view.extend({
// Build messages array
this.messages.push({ role: 'user', content: message });
// Send to API (120s timeout for slow models)
// Pass messages as array - RPCD will handle JSON serialization
callChatWithTimeout(this.selectedModel, this.messages, 120000)
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
callChatDirect(this.apiPort, this.selectedModel, this.messages, 180000)
.then(function(result) {
var loading = document.getElementById('loading-msg');
if (loading) loading.remove();

View File

@ -18,7 +18,7 @@ var callModels = rpc.declare({
var callHealth = rpc.declare({
object: 'luci.localai',
method: 'health',
expect: { healthy: false }
expect: { }
});
var callMetrics = rpc.declare({
@ -30,19 +30,19 @@ var callMetrics = rpc.declare({
var callStart = rpc.declare({
object: 'luci.localai',
method: 'start',
expect: { success: false }
expect: { }
});
var callStop = rpc.declare({
object: 'luci.localai',
method: 'stop',
expect: { success: false }
expect: { }
});
var callRestart = rpc.declare({
object: 'luci.localai',
method: 'restart',
expect: { success: false }
expect: { }
});
function formatBytes(bytes) {

View File

@ -13,14 +13,14 @@ var callModelInstall = rpc.declare({
object: 'luci.localai',
method: 'model_install',
params: ['name'],
expect: { success: false }
expect: { }
});
var callModelRemove = rpc.declare({
object: 'luci.localai',
method: 'model_remove',
params: ['name'],
expect: { success: false }
expect: { }
});
function formatBytes(bytes) {

View File

@ -94,13 +94,22 @@ get_models() {
[ $first -eq 0 ] && echo ","
first=0
seen="$seen $model_id"
# Check YAML config for embeddings flag
local is_embedding="false"
for yext in yaml yml; do
if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
break
fi
done
cat <<EOF
{
"id": "$model_id",
"name": "$model_id",
"size": 0,
"type": "loaded",
"loaded": true
"loaded": true,
"embeddings": $is_embedding
}
EOF
i=$((i + 1))
@ -350,41 +359,54 @@ do_chat() {
# Log for debugging
logger -t localai-chat "Request to model: $model"
# Call LocalAI API using curl if available, otherwise wget
# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
local tmpfile="/tmp/localai_chat_$$"
local tmpfile_err="/tmp/localai_chat_err_$$"
# Use longer timeout for LLM responses (120 seconds)
if command -v curl >/dev/null 2>&1; then
curl -s -m 120 -X POST "http://127.0.0.1:$API_PORT/v1/chat/completions" \
-H "Content-Type: application/json" \
-d "$request_body" \
-o "$tmpfile" 2>"$tmpfile_err"
else
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
fi
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
# Log raw response for debugging
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
# Extract message content using jsonfilter
local content=$(jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null)
# Check for API error first
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
if [ -n "$error" ]; then
# Escape quotes and newlines in error
error=$(echo "$error" | sed 's/"/\\"/g' | tr '\n' ' ')
error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
echo "{\"response\":\"\",\"error\":\"$error\"}"
elif [ -n "$content" ]; then
# Properly escape the content for JSON output
# Handle quotes, backslashes, and newlines
content=$(printf '%s' "$content" | sed 's/\\/\\\\/g; s/"/\\"/g' | awk '{printf "%s\\n", $0}' | sed 's/\\n$//')
echo "{\"response\":\"$content\"}"
else
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
# Extract content and build JSON via file I/O to handle
# large responses and special characters safely
local outfile="/tmp/localai_out_$$"
local has_content=0
jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
printf '{"response":"'
awk '
BEGIN { ORS="" }
{
gsub(/\\/, "\\\\")
gsub(/"/, "\\\"")
gsub(/\t/, "\\t")
gsub(/\r/, "")
if (NR > 1) printf "\\n"
printf "%s", $0
}'
printf '"}'
} > "$outfile"
# Check if we got actual content (file should be > 17 bytes: {"response":""} )
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
if [ "$outsize" -gt 17 ]; then
cat "$outfile"
else
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
fi
rm -f "$outfile"
fi
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
else
@ -411,14 +433,38 @@ do_complete() {
return
fi
local response=$(wget -q -O - --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null)
local tmpfile="/tmp/localai_comp_$$"
if [ -n "$response" ]; then
local text=$(echo "$response" | jsonfilter -e '@.choices[0].text' 2>/dev/null)
echo "{\"text\":\"$(echo "$text" | sed 's/"/\\"/g')\"}"
wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
--header="Content-Type: application/json" \
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
local outfile="/tmp/localai_compout_$$"
jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
printf '{"text":"'
awk '
BEGIN { ORS="" }
{
gsub(/\\/, "\\\\")
gsub(/"/, "\\\"")
gsub(/\t/, "\\t")
gsub(/\r/, "")
if (NR > 1) printf "\\n"
printf "%s", $0
}'
printf '"}'
} > "$outfile"
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
if [ "$outsize" -gt 11 ]; then
cat "$outfile"
else
echo '{"text":"","error":"Empty response from API"}'
fi
rm -f "$outfile" "$tmpfile"
else
rm -f "$tmpfile"
echo '{"text":"","error":"API request failed"}'
fi
}

View File

@ -309,9 +309,10 @@ cmd_models() {
echo ""
echo "=== Available Presets ==="
echo " tinyllama - 669MB - TinyLlama 1.1B"
echo " phi2 - 1.6GB - Microsoft Phi-2"
echo " mistral - 4.1GB - Mistral 7B Instruct"
echo " tinyllama - 669MB - TinyLlama 1.1B (chat)"
echo " phi2 - 1.6GB - Microsoft Phi-2 (chat)"
echo " mistral - 4.1GB - Mistral 7B Instruct (chat)"
echo " gte-small - 67MB - GTE Small (embeddings)"
echo ""
echo "Install: localaictl model-install <name>"
}
@ -340,13 +341,17 @@ cmd_model_install() {
url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
;;
gte-small)
url="https://huggingface.co/ggml-org/gte-small-Q8_0-GGUF/resolve/main/gte-small-q8_0.gguf"
filename="gte-small-q8_0.gguf"
;;
http*)
url="$model_name"
filename=$(basename "$url")
;;
*)
log_error "Unknown model: $model_name"
log_error "Use preset name (tinyllama, phi2, mistral) or full URL"
log_error "Use preset name (tinyllama, phi2, mistral, gte-small) or full URL"
return 1
;;
esac
@ -358,7 +363,22 @@ cmd_model_install() {
if wget -L --show-progress -O "$models_path/$filename" "$url"; then
# Create YAML config for the model
local model_id="${filename%.*}"
cat > "$models_path/$model_id.yaml" << EOF
# Embedding models need different config
case "$model_name" in
gte-small)
cat > "$models_path/$model_id.yaml" << EOF
name: $model_id
backend: llama-cpp
embeddings: true
parameters:
model: $filename
context_size: 512
threads: $threads
EOF
;;
*)
cat > "$models_path/$model_id.yaml" << EOF
name: $model_id
backend: llama-cpp
parameters:
@ -366,6 +386,8 @@ parameters:
context_size: $context_size
threads: $threads
EOF
;;
esac
log_info "Model installed: $model_id"
log_info "Restart service to load: /etc/init.d/localai restart"
else