fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping

- Add gte-small embedding model preset to localaictl with proper YAML config (embeddings: true, context_size: 512) - Fix RPC expect declarations across api.js, dashboard.js, models.js to use empty expect objects, preserving full response including error fields - Replace fragile sed/awk JSON escaping in RPCD chat and completion handlers with file I/O streaming through awk for robust handling of special characters in LLM responses - Switch RPCD chat handler from curl to wget to avoid missing output file on timeout (curl doesn't create -o file on exit code 28) - Bypass RPCD 30s script timeout for chat by calling LocalAI API directly from the browser via fetch() - Add embeddings flag to models RPC and filter embedding models from chat view model selector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 19:22:30 +01:00 · 2026-02-03 19:22:30 +01:00 · daa4c48375
commit daa4c48375
parent 28acb7e70f
6 changed files with 148 additions and 80 deletions
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/localai/api.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/localai/api.js
@ -31,7 +31,7 @@ var callConfig = rpc.declare({
 var callHealth = rpc.declare({
 	object: 'luci.localai',
 	method: 'health',
-	expect: { healthy: false }
+	expect: { }
 });
 var callMetrics = rpc.declare({
@ -43,47 +43,47 @@ var callMetrics = rpc.declare({
 var callStart = rpc.declare({
 	object: 'luci.localai',
 	method: 'start',
-	expect: { success: false }
+	expect: { }
 });
 var callStop = rpc.declare({
 	object: 'luci.localai',
 	method: 'stop',
-	expect: { success: false }
+	expect: { }
 });
 var callRestart = rpc.declare({
 	object: 'luci.localai',
 	method: 'restart',
-	expect: { success: false }
+	expect: { }
 });
 var callModelInstall = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_install',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });
 var callModelRemove = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_remove',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });
 var callChat = rpc.declare({
 	object: 'luci.localai',
 	method: 'chat',
 	params: ['model', 'messages'],
-	expect: { response: '' }
+	expect: { }
 });
 var callComplete = rpc.declare({
 	object: 'luci.localai',
 	method: 'complete',
 	params: ['model', 'prompt'],
-	expect: { text: '' }
+	expect: { }
 });
 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/chat.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/chat.js
@ -9,51 +9,46 @@ var callModels = rpc.declare({
 	expect: { models: [] }
 });
-// Custom chat function with longer timeout (LLMs can be slow)
+var callStatus = rpc.declare({
-function callChatWithTimeout(model, messages, timeoutMs) {
+	object: 'luci.localai',
 	method: 'status',
 	expect: { }
 });
 // Call LocalAI API directly (bypasses RPCD 30s script timeout)
 function callChatDirect(apiPort, model, messages, timeoutMs) {
 	return new Promise(function(resolve, reject) {
-		var timeout = timeoutMs || 120000; // 2 minutes default
+		var timeout = timeoutMs || 180000; // 3 minutes default for LLM
 		var controller = new AbortController();
 		var timeoutId = setTimeout(function() {
 			controller.abort();
-			reject(new Error('Request timed out - model may need more time'));
+			reject(new Error('Request timed out after ' + (timeout/1000) + 's - model may need more time'));
 		}, timeout);
-		// Use ubus RPC endpoint
+		var apiUrl = window.location.protocol + '//' + window.location.hostname + ':' + apiPort + '/v1/chat/completions';
 		var ubusUrl = L.url('admin/ubus');
 		var payload = JSON.stringify({
 			jsonrpc: '2.0',
 			id: Date.now(),
 			method: 'call',
 			params: [
 				rpc.getSessionID() || '00000000000000000000000000000000',
 				'luci.localai',
 				'chat',
 				{ model: model, messages: messages }
 			]
 		});
-		fetch(ubusUrl, {
+		fetch(apiUrl, {
 			method: 'POST',
 			credentials: 'same-origin',
 			headers: { 'Content-Type': 'application/json' },
-			body: payload,
+			body: JSON.stringify({ model: model, messages: messages }),
 			signal: controller.signal
 		})
 		.then(function(response) {
 			clearTimeout(timeoutId);
 			if (!response.ok) {
-				throw new Error('HTTP ' + response.status);
+				return response.text().then(function(t) {
 					throw new Error('API error HTTP ' + response.status + ': ' + t.substring(0, 200));
 				});
 			}
 			return response.json();
 		})
 		.then(function(data) {
-			clearTimeout(timeoutId);
+			if (data.error) {
-			if (data.result && Array.isArray(data.result) && data.result[1]) {
+				resolve({ response: '', error: data.error.message || JSON.stringify(data.error) });
-				resolve(data.result[1]);
+			} else if (data.choices && data.choices[0] && data.choices[0].message) {
-			} else if (data.error) {
+				resolve({ response: data.choices[0].message.content || '' });
 				reject(new Error(data.error.message || 'RPC error'));
 			} else {
-				resolve({ response: '', error: 'Unexpected response format' });
+				resolve({ response: '', error: 'Unexpected API response format' });
 			}
 		})
 		.catch(function(err) {
@ -73,13 +68,19 @@ return view.extend({
 	selectedModel: null,
 	load: function() {
-		return callModels();
+		return Promise.all([callModels(), callStatus()]);
 	},
 	render: function(data) {
 		var self = this;
 		var modelsData = data[0];
 		var statusData = data[1] || {};
 		this.apiPort = statusData.api_port || 8080;
 		// RPC with expect returns array directly
-		var models = Array.isArray(data) ? data : (data && data.models ? data.models : []);
+		var allModels = Array.isArray(modelsData) ? modelsData : (modelsData && modelsData.models ? modelsData.models : []);
 		// Filter out embedding models - they can't do chat completions
 		var models = allModels.filter(function(m) { return !m.embeddings; });
 		var container = E('div', { 'class': 'localai-chat' }, [
 			E('style', {}, this.getCSS()),
@ -101,7 +102,7 @@ return view.extend({
 							var displayName = m.loaded ? modelId + ' ✓' : modelId;
 							return E('option', { 'value': modelId }, displayName);
 						}) :
-						[E('option', { 'value': '' }, _('No models available'))]
+						[E('option', { 'value': '' }, _('No chat models available'))]
 					)
 				])
 			]),
@ -188,9 +189,8 @@ return view.extend({
 		// Build messages array
 		this.messages.push({ role: 'user', content: message });
-		// Send to API (120s timeout for slow models)
+		// Call LocalAI API directly (bypasses RPCD 30s script timeout)
-		// Pass messages as array - RPCD will handle JSON serialization
+		callChatDirect(this.apiPort, this.selectedModel, this.messages, 180000)
 		callChatWithTimeout(this.selectedModel, this.messages, 120000)
 			.then(function(result) {
 				var loading = document.getElementById('loading-msg');
 				if (loading) loading.remove();
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/dashboard.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/dashboard.js
@ -18,7 +18,7 @@ var callModels = rpc.declare({
 var callHealth = rpc.declare({
 	object: 'luci.localai',
 	method: 'health',
-	expect: { healthy: false }
+	expect: { }
 });
 var callMetrics = rpc.declare({
@ -30,19 +30,19 @@ var callMetrics = rpc.declare({
 var callStart = rpc.declare({
 	object: 'luci.localai',
 	method: 'start',
-	expect: { success: false }
+	expect: { }
 });
 var callStop = rpc.declare({
 	object: 'luci.localai',
 	method: 'stop',
-	expect: { success: false }
+	expect: { }
 });
 var callRestart = rpc.declare({
 	object: 'luci.localai',
 	method: 'restart',
-	expect: { success: false }
+	expect: { }
 });
 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/models.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/models.js
@ -13,14 +13,14 @@ var callModelInstall = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_install',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });
 var callModelRemove = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_remove',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });
 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/root/usr/libexec/rpcd/luci.localai
+++ b/package/secubox/luci-app-localai/root/usr/libexec/rpcd/luci.localai
@ -94,13 +94,22 @@ get_models() {
 				[ $first -eq 0 ] && echo ","
 				first=0
 				seen="$seen $model_id"
 				# Check YAML config for embeddings flag
 				local is_embedding="false"
 				for yext in yaml yml; do
 					if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
 						grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
 						break
 					fi
 				done
 				cat <<EOF
 {
 	"id": "$model_id",
 	"name": "$model_id",
 	"size": 0,
 	"type": "loaded",
-	"loaded": true
+	"loaded": true,
 	"embeddings": $is_embedding
 }
 EOF
 				i=$((i + 1))
@ -350,41 +359,54 @@ do_chat() {
 	# Log for debugging
 	logger -t localai-chat "Request to model: $model"
-	# Call LocalAI API using curl if available, otherwise wget
+	# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
 	local tmpfile="/tmp/localai_chat_$$"
 	local tmpfile_err="/tmp/localai_chat_err_$$"
 	# Use longer timeout for LLM responses (120 seconds)
-	if command -v curl >/dev/null 2>&1; then
+	wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
-		curl -s -m 120 -X POST "http://127.0.0.1:$API_PORT/v1/chat/completions" \
+		--header="Content-Type: application/json" \
-			-H "Content-Type: application/json" \
+		"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
 			-d "$request_body" \
 			-o "$tmpfile" 2>"$tmpfile_err"
 	else
 		wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
 			--header="Content-Type: application/json" \
 			"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
 	fi
 	if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
 		# Log raw response for debugging
 		logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"
-		# Extract message content using jsonfilter
+		# Check for API error first
 		local content=$(jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null)
 		local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)
 		if [ -n "$error" ]; then
-			# Escape quotes and newlines in error
+			error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
 			error=$(echo "$error" | sed 's/"/\\"/g' | tr '\n' ' ')
 			echo "{\"response\":\"\",\"error\":\"$error\"}"
 		elif [ -n "$content" ]; then
 			# Properly escape the content for JSON output
 			# Handle quotes, backslashes, and newlines
 			content=$(printf '%s' "$content" | sed 's/\\/\\\\/g; s/"/\\"/g' | awk '{printf "%s\\n", $0}' | sed 's/\\n$//')
 			echo "{\"response\":\"$content\"}"
 		else
-			echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
+			# Extract content and build JSON via file I/O to handle
 			# large responses and special characters safely
 			local outfile="/tmp/localai_out_$$"
 			local has_content=0
 			jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
 				printf '{"response":"'
 				awk '
 				BEGIN { ORS="" }
 				{
 					gsub(/\\/, "\\\\")
 					gsub(/"/, "\\\"")
 					gsub(/\t/, "\\t")
 					gsub(/\r/, "")
 					if (NR > 1) printf "\\n"
 					printf "%s", $0
 				}'
 				printf '"}'
 			} > "$outfile"
 			# Check if we got actual content (file should be > 17 bytes: {"response":""} )
 			local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
 			if [ "$outsize" -gt 17 ]; then
 				cat "$outfile"
 			else
 				echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
 			fi
 			rm -f "$outfile"
 		fi
 		rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
 	else
@ -411,14 +433,38 @@ do_complete() {
 		return
 	fi
-	local response=$(wget -q -O - --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
+	local tmpfile="/tmp/localai_comp_$$"
 		--header="Content-Type: application/json" \
 		"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null)
-	if [ -n "$response" ]; then
+	wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
-		local text=$(echo "$response" | jsonfilter -e '@.choices[0].text' 2>/dev/null)
+		--header="Content-Type: application/json" \
-		echo "{\"text\":\"$(echo "$text" | sed 's/"/\\"/g')\"}"
+		"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
 	if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
 		local outfile="/tmp/localai_compout_$$"
 		jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
 			printf '{"text":"'
 			awk '
 			BEGIN { ORS="" }
 			{
 				gsub(/\\/, "\\\\")
 				gsub(/"/, "\\\"")
 				gsub(/\t/, "\\t")
 				gsub(/\r/, "")
 				if (NR > 1) printf "\\n"
 				printf "%s", $0
 			}'
 			printf '"}'
 		} > "$outfile"
 		local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
 		if [ "$outsize" -gt 11 ]; then
 			cat "$outfile"
 		else
 			echo '{"text":"","error":"Empty response from API"}'
 		fi
 		rm -f "$outfile" "$tmpfile"
 	else
 		rm -f "$tmpfile"
 		echo '{"text":"","error":"API request failed"}'
 	fi
 }
--- a/package/secubox/secubox-app-localai/files/usr/sbin/localaictl
+++ b/package/secubox/secubox-app-localai/files/usr/sbin/localaictl
@ -309,9 +309,10 @@ cmd_models() {
 	echo ""
 	echo "=== Available Presets ==="
-	echo "  tinyllama    - 669MB  - TinyLlama 1.1B"
+	echo "  tinyllama    - 669MB  - TinyLlama 1.1B (chat)"
-	echo "  phi2         - 1.6GB  - Microsoft Phi-2"
+	echo "  phi2         - 1.6GB  - Microsoft Phi-2 (chat)"
-	echo "  mistral      - 4.1GB  - Mistral 7B Instruct"
+	echo "  mistral      - 4.1GB  - Mistral 7B Instruct (chat)"
 	echo "  gte-small    -  67MB  - GTE Small (embeddings)"
 	echo ""
 	echo "Install: localaictl model-install <name>"
 }
@ -340,13 +341,17 @@ cmd_model_install() {
 			url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
 			filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
 			;;
 		gte-small)
 			url="https://huggingface.co/ggml-org/gte-small-Q8_0-GGUF/resolve/main/gte-small-q8_0.gguf"
 			filename="gte-small-q8_0.gguf"
 			;;
 		http*)
 			url="$model_name"
 			filename=$(basename "$url")
 			;;
 		*)
 			log_error "Unknown model: $model_name"
-			log_error "Use preset name (tinyllama, phi2, mistral) or full URL"
+			log_error "Use preset name (tinyllama, phi2, mistral, gte-small) or full URL"
 			return 1
 			;;
 	esac
@ -358,7 +363,22 @@ cmd_model_install() {
 	if wget -L --show-progress -O "$models_path/$filename" "$url"; then
 		# Create YAML config for the model
 		local model_id="${filename%.*}"
-		cat > "$models_path/$model_id.yaml" << EOF
+
 		# Embedding models need different config
 		case "$model_name" in
 			gte-small)
 				cat > "$models_path/$model_id.yaml" << EOF
 name: $model_id
 backend: llama-cpp
 embeddings: true
 parameters:
  model: $filename
 context_size: 512
 threads: $threads
 EOF
 				;;
 			*)
 				cat > "$models_path/$model_id.yaml" << EOF
 name: $model_id
 backend: llama-cpp
 parameters:
@ -366,6 +386,8 @@ parameters:
 context_size: $context_size
 threads: $threads
 EOF
 				;;
 		esac
 		log_info "Model installed: $model_id"
 		log_info "Restart service to load: /etc/init.d/localai restart"
 	else