fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping
- Add gte-small embedding model preset to localaictl with proper YAML config (embeddings: true, context_size: 512) - Fix RPC expect declarations across api.js, dashboard.js, models.js to use empty expect objects, preserving full response including error fields - Replace fragile sed/awk JSON escaping in RPCD chat and completion handlers with file I/O streaming through awk for robust handling of special characters in LLM responses - Switch RPCD chat handler from curl to wget to avoid missing output file on timeout (curl doesn't create -o file on exit code 28) - Bypass RPCD 30s script timeout for chat by calling LocalAI API directly from the browser via fetch() - Add embeddings flag to models RPC and filter embedding models from chat view model selector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
28acb7e70f
commit
daa4c48375
@ -31,7 +31,7 @@ var callConfig = rpc.declare({
|
|||||||
var callHealth = rpc.declare({
|
var callHealth = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'health',
|
method: 'health',
|
||||||
expect: { healthy: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callMetrics = rpc.declare({
|
var callMetrics = rpc.declare({
|
||||||
@ -43,47 +43,47 @@ var callMetrics = rpc.declare({
|
|||||||
var callStart = rpc.declare({
|
var callStart = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'start',
|
method: 'start',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callStop = rpc.declare({
|
var callStop = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'stop',
|
method: 'stop',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callRestart = rpc.declare({
|
var callRestart = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'restart',
|
method: 'restart',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callModelInstall = rpc.declare({
|
var callModelInstall = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'model_install',
|
method: 'model_install',
|
||||||
params: ['name'],
|
params: ['name'],
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callModelRemove = rpc.declare({
|
var callModelRemove = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'model_remove',
|
method: 'model_remove',
|
||||||
params: ['name'],
|
params: ['name'],
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callChat = rpc.declare({
|
var callChat = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'chat',
|
method: 'chat',
|
||||||
params: ['model', 'messages'],
|
params: ['model', 'messages'],
|
||||||
expect: { response: '' }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callComplete = rpc.declare({
|
var callComplete = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'complete',
|
method: 'complete',
|
||||||
params: ['model', 'prompt'],
|
params: ['model', 'prompt'],
|
||||||
expect: { text: '' }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
function formatBytes(bytes) {
|
function formatBytes(bytes) {
|
||||||
|
|||||||
@ -9,51 +9,46 @@ var callModels = rpc.declare({
|
|||||||
expect: { models: [] }
|
expect: { models: [] }
|
||||||
});
|
});
|
||||||
|
|
||||||
// Custom chat function with longer timeout (LLMs can be slow)
|
var callStatus = rpc.declare({
|
||||||
function callChatWithTimeout(model, messages, timeoutMs) {
|
object: 'luci.localai',
|
||||||
|
method: 'status',
|
||||||
|
expect: { }
|
||||||
|
});
|
||||||
|
|
||||||
|
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
|
||||||
|
function callChatDirect(apiPort, model, messages, timeoutMs) {
|
||||||
return new Promise(function(resolve, reject) {
|
return new Promise(function(resolve, reject) {
|
||||||
var timeout = timeoutMs || 120000; // 2 minutes default
|
var timeout = timeoutMs || 180000; // 3 minutes default for LLM
|
||||||
var controller = new AbortController();
|
var controller = new AbortController();
|
||||||
var timeoutId = setTimeout(function() {
|
var timeoutId = setTimeout(function() {
|
||||||
controller.abort();
|
controller.abort();
|
||||||
reject(new Error('Request timed out - model may need more time'));
|
reject(new Error('Request timed out after ' + (timeout/1000) + 's - model may need more time'));
|
||||||
}, timeout);
|
}, timeout);
|
||||||
|
|
||||||
// Use ubus RPC endpoint
|
var apiUrl = window.location.protocol + '//' + window.location.hostname + ':' + apiPort + '/v1/chat/completions';
|
||||||
var ubusUrl = L.url('admin/ubus');
|
|
||||||
var payload = JSON.stringify({
|
|
||||||
jsonrpc: '2.0',
|
|
||||||
id: Date.now(),
|
|
||||||
method: 'call',
|
|
||||||
params: [
|
|
||||||
rpc.getSessionID() || '00000000000000000000000000000000',
|
|
||||||
'luci.localai',
|
|
||||||
'chat',
|
|
||||||
{ model: model, messages: messages }
|
|
||||||
]
|
|
||||||
});
|
|
||||||
|
|
||||||
fetch(ubusUrl, {
|
fetch(apiUrl, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
credentials: 'same-origin',
|
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: payload,
|
body: JSON.stringify({ model: model, messages: messages }),
|
||||||
signal: controller.signal
|
signal: controller.signal
|
||||||
})
|
})
|
||||||
.then(function(response) {
|
.then(function(response) {
|
||||||
|
clearTimeout(timeoutId);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
throw new Error('HTTP ' + response.status);
|
return response.text().then(function(t) {
|
||||||
|
throw new Error('API error HTTP ' + response.status + ': ' + t.substring(0, 200));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
return response.json();
|
return response.json();
|
||||||
})
|
})
|
||||||
.then(function(data) {
|
.then(function(data) {
|
||||||
clearTimeout(timeoutId);
|
if (data.error) {
|
||||||
if (data.result && Array.isArray(data.result) && data.result[1]) {
|
resolve({ response: '', error: data.error.message || JSON.stringify(data.error) });
|
||||||
resolve(data.result[1]);
|
} else if (data.choices && data.choices[0] && data.choices[0].message) {
|
||||||
} else if (data.error) {
|
resolve({ response: data.choices[0].message.content || '' });
|
||||||
reject(new Error(data.error.message || 'RPC error'));
|
|
||||||
} else {
|
} else {
|
||||||
resolve({ response: '', error: 'Unexpected response format' });
|
resolve({ response: '', error: 'Unexpected API response format' });
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.catch(function(err) {
|
.catch(function(err) {
|
||||||
@ -73,13 +68,19 @@ return view.extend({
|
|||||||
selectedModel: null,
|
selectedModel: null,
|
||||||
|
|
||||||
load: function() {
|
load: function() {
|
||||||
return callModels();
|
return Promise.all([callModels(), callStatus()]);
|
||||||
},
|
},
|
||||||
|
|
||||||
render: function(data) {
|
render: function(data) {
|
||||||
var self = this;
|
var self = this;
|
||||||
|
var modelsData = data[0];
|
||||||
|
var statusData = data[1] || {};
|
||||||
|
this.apiPort = statusData.api_port || 8080;
|
||||||
|
|
||||||
// RPC with expect returns array directly
|
// RPC with expect returns array directly
|
||||||
var models = Array.isArray(data) ? data : (data && data.models ? data.models : []);
|
var allModels = Array.isArray(modelsData) ? modelsData : (modelsData && modelsData.models ? modelsData.models : []);
|
||||||
|
// Filter out embedding models - they can't do chat completions
|
||||||
|
var models = allModels.filter(function(m) { return !m.embeddings; });
|
||||||
|
|
||||||
var container = E('div', { 'class': 'localai-chat' }, [
|
var container = E('div', { 'class': 'localai-chat' }, [
|
||||||
E('style', {}, this.getCSS()),
|
E('style', {}, this.getCSS()),
|
||||||
@ -101,7 +102,7 @@ return view.extend({
|
|||||||
var displayName = m.loaded ? modelId + ' ✓' : modelId;
|
var displayName = m.loaded ? modelId + ' ✓' : modelId;
|
||||||
return E('option', { 'value': modelId }, displayName);
|
return E('option', { 'value': modelId }, displayName);
|
||||||
}) :
|
}) :
|
||||||
[E('option', { 'value': '' }, _('No models available'))]
|
[E('option', { 'value': '' }, _('No chat models available'))]
|
||||||
)
|
)
|
||||||
])
|
])
|
||||||
]),
|
]),
|
||||||
@ -188,9 +189,8 @@ return view.extend({
|
|||||||
// Build messages array
|
// Build messages array
|
||||||
this.messages.push({ role: 'user', content: message });
|
this.messages.push({ role: 'user', content: message });
|
||||||
|
|
||||||
// Send to API (120s timeout for slow models)
|
// Call LocalAI API directly (bypasses RPCD 30s script timeout)
|
||||||
// Pass messages as array - RPCD will handle JSON serialization
|
callChatDirect(this.apiPort, this.selectedModel, this.messages, 180000)
|
||||||
callChatWithTimeout(this.selectedModel, this.messages, 120000)
|
|
||||||
.then(function(result) {
|
.then(function(result) {
|
||||||
var loading = document.getElementById('loading-msg');
|
var loading = document.getElementById('loading-msg');
|
||||||
if (loading) loading.remove();
|
if (loading) loading.remove();
|
||||||
|
|||||||
@ -18,7 +18,7 @@ var callModels = rpc.declare({
|
|||||||
var callHealth = rpc.declare({
|
var callHealth = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'health',
|
method: 'health',
|
||||||
expect: { healthy: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callMetrics = rpc.declare({
|
var callMetrics = rpc.declare({
|
||||||
@ -30,19 +30,19 @@ var callMetrics = rpc.declare({
|
|||||||
var callStart = rpc.declare({
|
var callStart = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'start',
|
method: 'start',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callStop = rpc.declare({
|
var callStop = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'stop',
|
method: 'stop',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callRestart = rpc.declare({
|
var callRestart = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'restart',
|
method: 'restart',
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
function formatBytes(bytes) {
|
function formatBytes(bytes) {
|
||||||
|
|||||||
@ -13,14 +13,14 @@ var callModelInstall = rpc.declare({
|
|||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'model_install',
|
method: 'model_install',
|
||||||
params: ['name'],
|
params: ['name'],
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
var callModelRemove = rpc.declare({
|
var callModelRemove = rpc.declare({
|
||||||
object: 'luci.localai',
|
object: 'luci.localai',
|
||||||
method: 'model_remove',
|
method: 'model_remove',
|
||||||
params: ['name'],
|
params: ['name'],
|
||||||
expect: { success: false }
|
expect: { }
|
||||||
});
|
});
|
||||||
|
|
||||||
function formatBytes(bytes) {
|
function formatBytes(bytes) {
|
||||||
|
|||||||
@ -94,13 +94,22 @@ get_models() {
|
|||||||
[ $first -eq 0 ] && echo ","
|
[ $first -eq 0 ] && echo ","
|
||||||
first=0
|
first=0
|
||||||
seen="$seen $model_id"
|
seen="$seen $model_id"
|
||||||
|
# Check YAML config for embeddings flag
|
||||||
|
local is_embedding="false"
|
||||||
|
for yext in yaml yml; do
|
||||||
|
if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
|
||||||
|
grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
{
|
{
|
||||||
"id": "$model_id",
|
"id": "$model_id",
|
||||||
"name": "$model_id",
|
"name": "$model_id",
|
||||||
"size": 0,
|
"size": 0,
|
||||||
"type": "loaded",
|
"type": "loaded",
|
||||||
"loaded": true
|
"loaded": true,
|
||||||
|
"embeddings": $is_embedding
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
i=$((i + 1))
|
i=$((i + 1))
|
||||||
@ -350,41 +359,54 @@ do_chat() {
|
|||||||
# Log for debugging
|
# Log for debugging
|
||||||
logger -t localai-chat "Request to model: $model"
|
logger -t localai-chat "Request to model: $model"
|
||||||
|
|
||||||
# Call LocalAI API using curl if available, otherwise wget
|
# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
|
||||||
local tmpfile="/tmp/localai_chat_$$"
|
local tmpfile="/tmp/localai_chat_$$"
|
||||||
local tmpfile_err="/tmp/localai_chat_err_$$"
|
local tmpfile_err="/tmp/localai_chat_err_$$"
|
||||||
|
|
||||||
# Use longer timeout for LLM responses (120 seconds)
|
# Use longer timeout for LLM responses (120 seconds)
|
||||||
if command -v curl >/dev/null 2>&1; then
|
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
|
||||||
curl -s -m 120 -X POST "http://127.0.0.1:$API_PORT/v1/chat/completions" \
|
--header="Content-Type: application/json" \
|
||||||
-H "Content-Type: application/json" \
|
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
|
||||||
-d "$request_body" \
|
|
||||||
-o "$tmpfile" 2>"$tmpfile_err"
|
|
||||||
else
|
|
||||||
wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
|
|
||||||
--header="Content-Type: application/json" \
|
|
||||||
"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
||||||
# Log raw response for debugging
|
# Log raw response for debugging
|
||||||
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
|
logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
|
||||||
|
|
||||||
# Extract message content using jsonfilter
|
# Check for API error first
|
||||||
local content=$(jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null)
|
|
||||||
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
|
local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
|
||||||
|
|
||||||
if [ -n "$error" ]; then
|
if [ -n "$error" ]; then
|
||||||
# Escape quotes and newlines in error
|
error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
|
||||||
error=$(echo "$error" | sed 's/"/\\"/g' | tr '\n' ' ')
|
|
||||||
echo "{\"response\":\"\",\"error\":\"$error\"}"
|
echo "{\"response\":\"\",\"error\":\"$error\"}"
|
||||||
elif [ -n "$content" ]; then
|
|
||||||
# Properly escape the content for JSON output
|
|
||||||
# Handle quotes, backslashes, and newlines
|
|
||||||
content=$(printf '%s' "$content" | sed 's/\\/\\\\/g; s/"/\\"/g' | awk '{printf "%s\\n", $0}' | sed 's/\\n$//')
|
|
||||||
echo "{\"response\":\"$content\"}"
|
|
||||||
else
|
else
|
||||||
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
|
# Extract content and build JSON via file I/O to handle
|
||||||
|
# large responses and special characters safely
|
||||||
|
local outfile="/tmp/localai_out_$$"
|
||||||
|
local has_content=0
|
||||||
|
|
||||||
|
jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
|
||||||
|
printf '{"response":"'
|
||||||
|
awk '
|
||||||
|
BEGIN { ORS="" }
|
||||||
|
{
|
||||||
|
gsub(/\\/, "\\\\")
|
||||||
|
gsub(/"/, "\\\"")
|
||||||
|
gsub(/\t/, "\\t")
|
||||||
|
gsub(/\r/, "")
|
||||||
|
if (NR > 1) printf "\\n"
|
||||||
|
printf "%s", $0
|
||||||
|
}'
|
||||||
|
printf '"}'
|
||||||
|
} > "$outfile"
|
||||||
|
|
||||||
|
# Check if we got actual content (file should be > 17 bytes: {"response":""} )
|
||||||
|
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
||||||
|
if [ "$outsize" -gt 17 ]; then
|
||||||
|
cat "$outfile"
|
||||||
|
else
|
||||||
|
echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
|
||||||
|
fi
|
||||||
|
rm -f "$outfile"
|
||||||
fi
|
fi
|
||||||
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
|
rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
|
||||||
else
|
else
|
||||||
@ -411,14 +433,38 @@ do_complete() {
|
|||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
local response=$(wget -q -O - --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
|
local tmpfile="/tmp/localai_comp_$$"
|
||||||
--header="Content-Type: application/json" \
|
|
||||||
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null)
|
|
||||||
|
|
||||||
if [ -n "$response" ]; then
|
wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
|
||||||
local text=$(echo "$response" | jsonfilter -e '@.choices[0].text' 2>/dev/null)
|
--header="Content-Type: application/json" \
|
||||||
echo "{\"text\":\"$(echo "$text" | sed 's/"/\\"/g')\"}"
|
"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
|
||||||
|
|
||||||
|
if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
|
||||||
|
local outfile="/tmp/localai_compout_$$"
|
||||||
|
jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
|
||||||
|
printf '{"text":"'
|
||||||
|
awk '
|
||||||
|
BEGIN { ORS="" }
|
||||||
|
{
|
||||||
|
gsub(/\\/, "\\\\")
|
||||||
|
gsub(/"/, "\\\"")
|
||||||
|
gsub(/\t/, "\\t")
|
||||||
|
gsub(/\r/, "")
|
||||||
|
if (NR > 1) printf "\\n"
|
||||||
|
printf "%s", $0
|
||||||
|
}'
|
||||||
|
printf '"}'
|
||||||
|
} > "$outfile"
|
||||||
|
|
||||||
|
local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
|
||||||
|
if [ "$outsize" -gt 11 ]; then
|
||||||
|
cat "$outfile"
|
||||||
|
else
|
||||||
|
echo '{"text":"","error":"Empty response from API"}'
|
||||||
|
fi
|
||||||
|
rm -f "$outfile" "$tmpfile"
|
||||||
else
|
else
|
||||||
|
rm -f "$tmpfile"
|
||||||
echo '{"text":"","error":"API request failed"}'
|
echo '{"text":"","error":"API request failed"}'
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|||||||
@ -309,9 +309,10 @@ cmd_models() {
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== Available Presets ==="
|
echo "=== Available Presets ==="
|
||||||
echo " tinyllama - 669MB - TinyLlama 1.1B"
|
echo " tinyllama - 669MB - TinyLlama 1.1B (chat)"
|
||||||
echo " phi2 - 1.6GB - Microsoft Phi-2"
|
echo " phi2 - 1.6GB - Microsoft Phi-2 (chat)"
|
||||||
echo " mistral - 4.1GB - Mistral 7B Instruct"
|
echo " mistral - 4.1GB - Mistral 7B Instruct (chat)"
|
||||||
|
echo " gte-small - 67MB - GTE Small (embeddings)"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Install: localaictl model-install <name>"
|
echo "Install: localaictl model-install <name>"
|
||||||
}
|
}
|
||||||
@ -340,13 +341,17 @@ cmd_model_install() {
|
|||||||
url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||||
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
|
||||||
;;
|
;;
|
||||||
|
gte-small)
|
||||||
|
url="https://huggingface.co/ggml-org/gte-small-Q8_0-GGUF/resolve/main/gte-small-q8_0.gguf"
|
||||||
|
filename="gte-small-q8_0.gguf"
|
||||||
|
;;
|
||||||
http*)
|
http*)
|
||||||
url="$model_name"
|
url="$model_name"
|
||||||
filename=$(basename "$url")
|
filename=$(basename "$url")
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
log_error "Unknown model: $model_name"
|
log_error "Unknown model: $model_name"
|
||||||
log_error "Use preset name (tinyllama, phi2, mistral) or full URL"
|
log_error "Use preset name (tinyllama, phi2, mistral, gte-small) or full URL"
|
||||||
return 1
|
return 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
@ -358,7 +363,22 @@ cmd_model_install() {
|
|||||||
if wget -L --show-progress -O "$models_path/$filename" "$url"; then
|
if wget -L --show-progress -O "$models_path/$filename" "$url"; then
|
||||||
# Create YAML config for the model
|
# Create YAML config for the model
|
||||||
local model_id="${filename%.*}"
|
local model_id="${filename%.*}"
|
||||||
cat > "$models_path/$model_id.yaml" << EOF
|
|
||||||
|
# Embedding models need different config
|
||||||
|
case "$model_name" in
|
||||||
|
gte-small)
|
||||||
|
cat > "$models_path/$model_id.yaml" << EOF
|
||||||
|
name: $model_id
|
||||||
|
backend: llama-cpp
|
||||||
|
embeddings: true
|
||||||
|
parameters:
|
||||||
|
model: $filename
|
||||||
|
context_size: 512
|
||||||
|
threads: $threads
|
||||||
|
EOF
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
cat > "$models_path/$model_id.yaml" << EOF
|
||||||
name: $model_id
|
name: $model_id
|
||||||
backend: llama-cpp
|
backend: llama-cpp
|
||||||
parameters:
|
parameters:
|
||||||
@ -366,6 +386,8 @@ parameters:
|
|||||||
context_size: $context_size
|
context_size: $context_size
|
||||||
threads: $threads
|
threads: $threads
|
||||||
EOF
|
EOF
|
||||||
|
;;
|
||||||
|
esac
|
||||||
log_info "Model installed: $model_id"
|
log_info "Model installed: $model_id"
|
||||||
log_info "Restart service to load: /etc/init.d/localai restart"
|
log_info "Restart service to load: /etc/init.d/localai restart"
|
||||||
else
|
else
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user