fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping
- Add gte-small embedding model preset to localaictl with proper YAML config (embeddings: true, context_size: 512) - Fix RPC expect declarations across api.js, dashboard.js, models.js to use empty expect objects, preserving full response including error fields - Replace fragile sed/awk JSON escaping in RPCD chat and completion handlers with file I/O streaming through awk for robust handling of special characters in LLM responses - Switch RPCD chat handler from curl to wget to avoid missing output file on timeout (curl doesn't create -o file on exit code 28) - Bypass RPCD 30s script timeout for chat by calling LocalAI API directly from the browser via fetch() - Add embeddings flag to models RPC and filter embedding models from chat view model selector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
28acb7e70f
commit
daa4c48375
@ -31,7 +31,7 @@ var callConfig = rpc.declare({
|
||||
var callHealth = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'health',
|
||||
expect: { healthy: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callMetrics = rpc.declare({
|
||||
@ -43,47 +43,47 @@ var callMetrics = rpc.declare({
|
||||
var callStart = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'start',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callStop = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'stop',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callRestart = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'restart',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callModelInstall = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'model_install',
|
||||
params: ['name'],
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callModelRemove = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'model_remove',
|
||||
params: ['name'],
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callChat = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'chat',
|
||||
params: ['model', 'messages'],
|
||||
expect: { response: '' }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callComplete = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'complete',
|
||||
params: ['model', 'prompt'],
|
||||
expect: { text: '' }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
function formatBytes(bytes) {
|
||||
|
||||
@ -9,51 +9,46 @@ var callModels = rpc.declare({
|
||||
expect: { models: [] }
|
||||
});
|
||||
|
||||
// Custom chat function with longer timeout (LLMs can be slow)
|
||||
function callChatWithTimeout(model, messages, timeoutMs) {
|
||||
var callStatus = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'status',
|
||||
expect: { }
|
||||
});
|
||||
|
||||
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
|
||||
function callChatDirect(apiPort, model, messages, timeoutMs) {
|
||||
return new Promise(function(resolve, reject) {
|
||||
var timeout = timeoutMs || 120000; // 2 minutes default
|
||||
var timeout = timeoutMs || 180000; // 3 minutes default for LLM
|
||||
var controller = new AbortController();
|
||||
var timeoutId = setTimeout(function() {
|
||||
controller.abort();
|
||||
reject(new Error('Request timed out - model may need more time'));
|
||||
reject(new Error('Request timed out after ' + (timeout/1000) + 's - model may need more time'));
|
||||
}, timeout);
|
||||
|
||||
// Use ubus RPC endpoint
|
||||
var ubusUrl = L.url('admin/ubus');
|
||||
var payload = JSON.stringify({
|
||||
jsonrpc: '2.0',
|
||||
id: Date.now(),
|
||||
method: 'call',
|
||||
params: [
|
||||
rpc.getSessionID() || '00000000000000000000000000000000',
|
||||
'luci.localai',
|
||||
'chat',
|
||||
{ model: model, messages: messages }
|
||||
]
|
||||
});
|
||||
var apiUrl = window.location.protocol + '//' + window.location.hostname + ':' + apiPort + '/v1/chat/completions';
|
||||
|
||||
fetch(ubusUrl, {
|
||||
fetch(apiUrl, {
|
||||
method: 'POST',
|
||||
credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: payload,
|
||||
body: JSON.stringify({ model: model, messages: messages }),
|
||||
signal: controller.signal
|
||||
})
|
||||
.then(function(response) {
|
||||
clearTimeout(timeoutId);
|
||||
if (!response.ok) {
|
||||
throw new Error('HTTP ' + response.status);
|
||||
return response.text().then(function(t) {
|
||||
throw new Error('API error HTTP ' + response.status + ': ' + t.substring(0, 200));
|
||||
});
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.then(function(data) {
|
||||
clearTimeout(timeoutId);
|
||||
if (data.result && Array.isArray(data.result) && data.result[1]) {
|
||||
resolve(data.result[1]);
|
||||
} else if (data.error) {
|
||||
reject(new Error(data.error.message || 'RPC error'));
|
||||
if (data.error) {
|
||||
resolve({ response: '', error: data.error.message || JSON.stringify(data.error) });
|
||||
} else if (data.choices && data.choices[0] && data.choices[0].message) {
|
||||
resolve({ response: data.choices[0].message.content || '' });
|
||||
} else {
|
||||
resolve({ response: '', error: 'Unexpected response format' });
|
||||
resolve({ response: '', error: 'Unexpected API response format' });
|
||||
}
|
||||
})
|
||||
.catch(function(err) {
|
||||
@ -73,13 +68,19 @@ return view.extend({
|
||||
selectedModel: null,
|
||||
|
||||
load: function() {
|
||||
return callModels();
|
||||
return Promise.all([callModels(), callStatus()]);
|
||||
},
|
||||
|
||||
render: function(data) {
|
||||
var self = this;
|
||||
var modelsData = data[0];
|
||||
var statusData = data[1] || {};
|
||||
this.apiPort = statusData.api_port || 8080;
|
||||
|
||||
// RPC with expect returns array directly
|
||||
var models = Array.isArray(data) ? data : (data && data.models ? data.models : []);
|
||||
var allModels = Array.isArray(modelsData) ? modelsData : (modelsData && modelsData.models ? modelsData.models : []);
|
||||
// Filter out embedding models - they can't do chat completions
|
||||
var models = allModels.filter(function(m) { return !m.embeddings; });
|
||||
|
||||
var container = E('div', { 'class': 'localai-chat' }, [
|
||||
E('style', {}, this.getCSS()),
|
||||
@ -101,7 +102,7 @@ return view.extend({
|
||||
var displayName = m.loaded ? modelId + ' ✓' : modelId;
|
||||
return E('option', { 'value': modelId }, displayName);
|
||||
}) :
|
||||
[E('option', { 'value': '' }, _('No models available'))]
|
||||
[E('option', { 'value': '' }, _('No chat models available'))]
|
||||
)
|
||||
])
|
||||
]),
|
||||
@ -188,9 +189,8 @@ return view.extend({
|
||||
// Build messages array
|
||||
this.messages.push({ role: 'user', content: message });
|
||||
|
||||
// Send to API (120s timeout for slow models)
|
||||
// Pass messages as array - RPCD will handle JSON serialization
|
||||
callChatWithTimeout(this.selectedModel, this.messages, 120000)
|
||||
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
|
||||
callChatDirect(this.apiPort, this.selectedModel, this.messages, 180000)
|
||||
.then(function(result) {
|
||||
var loading = document.getElementById('loading-msg');
|
||||
if (loading) loading.remove();
|
||||
|
||||
@ -18,7 +18,7 @@ var callModels = rpc.declare({
|
||||
var callHealth = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'health',
|
||||
expect: { healthy: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callMetrics = rpc.declare({
|
||||
@ -30,19 +30,19 @@ var callMetrics = rpc.declare({
|
||||
var callStart = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'start',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callStop = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'stop',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callRestart = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'restart',
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
function formatBytes(bytes) {
|
||||
|
||||
@ -13,14 +13,14 @@ var callModelInstall = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'model_install',
|
||||
params: ['name'],
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
var callModelRemove = rpc.declare({
|
||||
object: 'luci.localai',
|
||||
method: 'model_remove',
|
||||
params: ['name'],
|
||||
expect: { success: false }
|
||||
expect: { }
|
||||
});
|
||||
|
||||
function formatBytes(bytes) {
|
||||
|
||||
@ -94,13 +94,22 @@ get_models() {
|
||||
[ $first -eq 0 ] && echo ","
|
||||
first=0
|
||||
seen="$seen $model_id"
|
||||
# Check YAML config for embeddings flag
|
||||
local is_embedding="false"
|
||||
for yext in yaml yml; do
|
||||
if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
|
||||
grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
|
||||
break
|
||||
fi
|
||||
done
|
||||
cat <<EOF
|
||||
{
|
||||
"id": "$model_id",
|
||||
"name": "$model_id",
|
||||
"size": 0,
|
||||
"type": "loaded",
|
||||
"loaded": true
|
||||
"loaded": true,
|
||||
"embeddings": $is_embedding
|
||||
}
|
||||
EOF
|
||||
i=$((i + 1))
|
||||
@ -350,41 +359,54 @@ do_chat() {
|
||||
# Log for debugging
|
||||
logger -t localai-chat "Request to model: $model"
|
||||
|
||||
# Call LocalAI API using curl if available, otherwise wget
|
||||
# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
|
||||
local tmpfile="/tmp/localai_chat_$$"
|
||||
local tmpfile_err="/tmp/localai_chat_err_$$"
|
||||
|
||||
# Use longer timeout for LLM responses (120 seconds)
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
curl -s -m 120 -X POST "http://127.0.0.1:$API_PORT/v1/chat/completions" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$request_body" \
|
||||
-o "$tmpfile" 2>"$tmpfile_err"
|
||||
else
|
||||
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
|
||||
--header="Content-Type: application/json" \
|
||||
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
|
||||
fi
|
||||
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
|
||||
--header="Content-Type: application/json" \
|
||||
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
|
||||
|
||||
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
||||
# Log raw response for debugging
|
||||
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
|
||||
|
||||
# Extract message content using jsonfilter
|
||||
local content=$(jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null)
|
||||
# Check for API error first
|
||||
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
|
||||
|
||||
if [ -n "$error" ]; then
|
||||
# Escape quotes and newlines in error
|
||||
error=$(echo "$error" | sed 's/"/\\"/g' | tr '\n' ' ')
|
||||
error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
|
||||
echo "{\"response\":\"\",\"error\":\"$error\"}"
|
||||
elif [ -n "$content" ]; then
|
||||
# Properly escape the content for JSON output
|
||||
# Handle quotes, backslashes, and newlines
|
||||
content=$(printf '%s' "$content" | sed 's/\\/\\\\/g; s/"/\\"/g' | awk '{printf "%s\\n", $0}' | sed 's/\\n$//')
|
||||
echo "{\"response\":\"$content\"}"
|
||||
else
|
||||
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
|
||||
# Extract content and build JSON via file I/O to handle
|
||||
# large responses and special characters safely
|
||||
local outfile="/tmp/localai_out_$$"
|
||||
local has_content=0
|
||||
|
||||
jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
|
||||
printf '{"response":"'
|
||||
awk '
|
||||
BEGIN { ORS="" }
|
||||
{
|
||||
gsub(/\\/, "\\\\")
|
||||
gsub(/"/, "\\\"")
|
||||
gsub(/\t/, "\\t")
|
||||
gsub(/\r/, "")
|
||||
if (NR > 1) printf "\\n"
|
||||
printf "%s", $0
|
||||
}'
|
||||
printf '"}'
|
||||
} > "$outfile"
|
||||
|
||||
# Check if we got actual content (file should be > 17 bytes: {"response":""} )
|
||||
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
||||
if [ "$outsize" -gt 17 ]; then
|
||||
cat "$outfile"
|
||||
else
|
||||
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
|
||||
fi
|
||||
rm -f "$outfile"
|
||||
fi
|
||||
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
|
||||
else
|
||||
@ -411,14 +433,38 @@ do_complete() {
|
||||
return
|
||||
fi
|
||||
|
||||
local response=$(wget -q -O - --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
|
||||
--header="Content-Type: application/json" \
|
||||
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null)
|
||||
local tmpfile="/tmp/localai_comp_$$"
|
||||
|
||||
if [ -n "$response" ]; then
|
||||
local text=$(echo "$response" | jsonfilter -e '@.choices[0].text' 2>/dev/null)
|
||||
echo "{\"text\":\"$(echo "$text" | sed 's/"/\\"/g')\"}"
|
||||
wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
|
||||
--header="Content-Type: application/json" \
|
||||
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
|
||||
|
||||
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
||||
local outfile="/tmp/localai_compout_$$"
|
||||
jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
|
||||
printf '{"text":"'
|
||||
awk '
|
||||
BEGIN { ORS="" }
|
||||
{
|
||||
gsub(/\\/, "\\\\")
|
||||
gsub(/"/, "\\\"")
|
||||
gsub(/\t/, "\\t")
|
||||
gsub(/\r/, "")
|
||||
if (NR > 1) printf "\\n"
|
||||
printf "%s", $0
|
||||
}'
|
||||
printf '"}'
|
||||
} > "$outfile"
|
||||
|
||||
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
||||
if [ "$outsize" -gt 11 ]; then
|
||||
cat "$outfile"
|
||||
else
|
||||
echo '{"text":"","error":"Empty response from API"}'
|
||||
fi
|
||||
rm -f "$outfile" "$tmpfile"
|
||||
else
|
||||
rm -f "$tmpfile"
|
||||
echo '{"text":"","error":"API request failed"}'
|
||||
fi
|
||||
}
|
||||
|
||||
@ -309,9 +309,10 @@ cmd_models() {
|
||||
|
||||
echo ""
|
||||
echo "=== Available Presets ==="
|
||||
echo " tinyllama - 669MB - TinyLlama 1.1B"
|
||||
echo " phi2 - 1.6GB - Microsoft Phi-2"
|
||||
echo " mistral - 4.1GB - Mistral 7B Instruct"
|
||||
echo " tinyllama - 669MB - TinyLlama 1.1B (chat)"
|
||||
echo " phi2 - 1.6GB - Microsoft Phi-2 (chat)"
|
||||
echo " mistral - 4.1GB - Mistral 7B Instruct (chat)"
|
||||
echo " gte-small - 67MB - GTE Small (embeddings)"
|
||||
echo ""
|
||||
echo "Install: localaictl model-install <name>"
|
||||
}
|
||||
@ -340,13 +341,17 @@ cmd_model_install() {
|
||||
url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||
;;
|
||||
gte-small)
|
||||
url="https://huggingface.co/ggml-org/gte-small-Q8_0-GGUF/resolve/main/gte-small-q8_0.gguf"
|
||||
filename="gte-small-q8_0.gguf"
|
||||
;;
|
||||
http*)
|
||||
url="$model_name"
|
||||
filename=$(basename "$url")
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown model: $model_name"
|
||||
log_error "Use preset name (tinyllama, phi2, mistral) or full URL"
|
||||
log_error "Use preset name (tinyllama, phi2, mistral, gte-small) or full URL"
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
@ -358,7 +363,22 @@ cmd_model_install() {
|
||||
if wget -L --show-progress -O "$models_path/$filename" "$url"; then
|
||||
# Create YAML config for the model
|
||||
local model_id="${filename%.*}"
|
||||
cat > "$models_path/$model_id.yaml" << EOF
|
||||
|
||||
# Embedding models need different config
|
||||
case "$model_name" in
|
||||
gte-small)
|
||||
cat > "$models_path/$model_id.yaml" << EOF
|
||||
name: $model_id
|
||||
backend: llama-cpp
|
||||
embeddings: true
|
||||
parameters:
|
||||
model: $filename
|
||||
context_size: 512
|
||||
threads: $threads
|
||||
EOF
|
||||
;;
|
||||
*)
|
||||
cat > "$models_path/$model_id.yaml" << EOF
|
||||
name: $model_id
|
||||
backend: llama-cpp
|
||||
parameters:
|
||||
@ -366,6 +386,8 @@ parameters:
|
||||
context_size: $context_size
|
||||
threads: $threads
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
log_info "Model installed: $model_id"
|
||||
log_info "Restart service to load: /etc/init.d/localai restart"
|
||||
else
|
||||
|
||||
Loading…
Reference in New Issue
Block a user