fix(localai): Add gte-small preset, fix RPC expect unwrapping and chat JSON escaping

- Add gte-small embedding model preset to localaictl with proper YAML config (embeddings: true, context_size: 512) - Fix RPC expect declarations across api.js, dashboard.js, models.js to use empty expect objects, preserving full response including error fields - Replace fragile sed/awk JSON escaping in RPCD chat and completion handlers with file I/O streaming through awk for robust handling of special characters in LLM responses - Switch RPCD chat handler from curl to wget to avoid missing output file on timeout (curl doesn't create -o file on exit code 28) - Bypass RPCD 30s script timeout for chat by calling LocalAI API directly from the browser via fetch() - Add embeddings flag to models RPC and filter embedding models from chat view model selector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 19:22:30 +01:00 · 2026-02-03 19:22:30 +01:00 · daa4c48375
commit daa4c48375
parent 28acb7e70f
6 changed files with 148 additions and 80 deletions
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/localai/api.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/localai/api.js
@ -31,7 +31,7 @@ var callConfig = rpc.declare({
 var callHealth = rpc.declare({
 	object: 'luci.localai',
 	method: 'health',
-	expect: { healthy: false }
+	expect: { }
 });

 var callMetrics = rpc.declare({
@ -43,47 +43,47 @@ var callMetrics = rpc.declare({
 var callStart = rpc.declare({
 	object: 'luci.localai',
 	method: 'start',
-	expect: { success: false }
+	expect: { }
 });

 var callStop = rpc.declare({
 	object: 'luci.localai',
 	method: 'stop',
-	expect: { success: false }
+	expect: { }
 });

 var callRestart = rpc.declare({
 	object: 'luci.localai',
 	method: 'restart',
-	expect: { success: false }
+	expect: { }
 });

 var callModelInstall = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_install',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });

 var callModelRemove = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_remove',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });

 var callChat = rpc.declare({
 	object: 'luci.localai',
 	method: 'chat',
 	params: ['model', 'messages'],
-	expect: { response: '' }
+	expect: { }
 });

 var callComplete = rpc.declare({
 	object: 'luci.localai',
 	method: 'complete',
 	params: ['model', 'prompt'],
-	expect: { text: '' }
+	expect: { }
 });

 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/chat.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/chat.js
@ -9,51 +9,46 @@ var callModels = rpc.declare({
 	expect: { models: [] }
 });

-// Custom chat function with longer timeout (LLMs can be slow)
-function callChatWithTimeout(model, messages, timeoutMs) {
+var callStatus = rpc.declare({
+	object: 'luci.localai',
+	method: 'status',
+	expect: { }
+});
+
+// Call LocalAI API directly (bypasses RPCD 30s script timeout)
+function callChatDirect(apiPort, model, messages, timeoutMs) {
 	return new Promise(function(resolve, reject) {
-		var timeout = timeoutMs || 120000; // 2 minutes default
+		var timeout = timeoutMs || 180000; // 3 minutes default for LLM
 		var controller = new AbortController();
 		var timeoutId = setTimeout(function() {
 			controller.abort();
-			reject(new Error('Request timed out - model may need more time'));
+			reject(new Error('Request timed out after ' + (timeout/1000) + 's - model may need more time'));
 		}, timeout);

-		// Use ubus RPC endpoint
-		var ubusUrl = L.url('admin/ubus');
-		var payload = JSON.stringify({
-			jsonrpc: '2.0',
-			id: Date.now(),
-			method: 'call',
-			params: [
-				rpc.getSessionID() || '00000000000000000000000000000000',
-				'luci.localai',
-				'chat',
-				{ model: model, messages: messages }
-			]
-		});
+		var apiUrl = window.location.protocol + '//' + window.location.hostname + ':' + apiPort + '/v1/chat/completions';

-		fetch(ubusUrl, {
+		fetch(apiUrl, {
 			method: 'POST',
-			credentials: 'same-origin',
 			headers: { 'Content-Type': 'application/json' },
-			body: payload,
+			body: JSON.stringify({ model: model, messages: messages }),
 			signal: controller.signal
 		})
 		.then(function(response) {
+			clearTimeout(timeoutId);
 			if (!response.ok) {
-				throw new Error('HTTP ' + response.status);
+				return response.text().then(function(t) {
+					throw new Error('API error HTTP ' + response.status + ': ' + t.substring(0, 200));
+				});
 			}
 			return response.json();
 		})
 		.then(function(data) {
-			clearTimeout(timeoutId);
-			if (data.result && Array.isArray(data.result) && data.result[1]) {
-				resolve(data.result[1]);
-			} else if (data.error) {
-				reject(new Error(data.error.message || 'RPC error'));
+			if (data.error) {
+				resolve({ response: '', error: data.error.message || JSON.stringify(data.error) });
+			} else if (data.choices && data.choices[0] && data.choices[0].message) {
+				resolve({ response: data.choices[0].message.content || '' });
 			} else {
-				resolve({ response: '', error: 'Unexpected response format' });
+				resolve({ response: '', error: 'Unexpected API response format' });
 			}
 		})
 		.catch(function(err) {
@ -73,13 +68,19 @@ return view.extend({
 	selectedModel: null,

 	load: function() {
-		return callModels();
+		return Promise.all([callModels(), callStatus()]);
 	},

 	render: function(data) {
 		var self = this;
+		var modelsData = data[0];
+		var statusData = data[1] || {};
+		this.apiPort = statusData.api_port || 8080;
+
 		// RPC with expect returns array directly
-		var models = Array.isArray(data) ? data : (data && data.models ? data.models : []);
+		var allModels = Array.isArray(modelsData) ? modelsData : (modelsData && modelsData.models ? modelsData.models : []);
+		// Filter out embedding models - they can't do chat completions
+		var models = allModels.filter(function(m) { return !m.embeddings; });

 		var container = E('div', { 'class': 'localai-chat' }, [
 			E('style', {}, this.getCSS()),
@ -101,7 +102,7 @@ return view.extend({
 							var displayName = m.loaded ? modelId + ' ✓' : modelId;
 							return E('option', { 'value': modelId }, displayName);
 						}) :
-						[E('option', { 'value': '' }, _('No models available'))]
+						[E('option', { 'value': '' }, _('No chat models available'))]
 					)
 				])
 			]),
@ -188,9 +189,8 @@ return view.extend({
 		// Build messages array
 		this.messages.push({ role: 'user', content: message });

-		// Send to API (120s timeout for slow models)
-		// Pass messages as array - RPCD will handle JSON serialization
-		callChatWithTimeout(this.selectedModel, this.messages, 120000)
+		// Call LocalAI API directly (bypasses RPCD 30s script timeout)
+		callChatDirect(this.apiPort, this.selectedModel, this.messages, 180000)
 			.then(function(result) {
 				var loading = document.getElementById('loading-msg');
 				if (loading) loading.remove();
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/dashboard.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/dashboard.js
@ -18,7 +18,7 @@ var callModels = rpc.declare({
 var callHealth = rpc.declare({
 	object: 'luci.localai',
 	method: 'health',
-	expect: { healthy: false }
+	expect: { }
 });

 var callMetrics = rpc.declare({
@ -30,19 +30,19 @@ var callMetrics = rpc.declare({
 var callStart = rpc.declare({
 	object: 'luci.localai',
 	method: 'start',
-	expect: { success: false }
+	expect: { }
 });

 var callStop = rpc.declare({
 	object: 'luci.localai',
 	method: 'stop',
-	expect: { success: false }
+	expect: { }
 });

 var callRestart = rpc.declare({
 	object: 'luci.localai',
 	method: 'restart',
-	expect: { success: false }
+	expect: { }
 });

 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/models.js
+++ b/package/secubox/luci-app-localai/htdocs/luci-static/resources/view/localai/models.js
@ -13,14 +13,14 @@ var callModelInstall = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_install',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });

 var callModelRemove = rpc.declare({
 	object: 'luci.localai',
 	method: 'model_remove',
 	params: ['name'],
-	expect: { success: false }
+	expect: { }
 });

 function formatBytes(bytes) {
--- a/package/secubox/luci-app-localai/root/usr/libexec/rpcd/luci.localai
+++ b/package/secubox/luci-app-localai/root/usr/libexec/rpcd/luci.localai
@ -94,13 +94,22 @@ get_models() {
 				[ $first -eq 0 ] && echo ","
 				first=0
 				seen="$seen $model_id"
+				# Check YAML config for embeddings flag
+				local is_embedding="false"
+				for yext in yaml yml; do
+					if [ -f "$MODELS_PATH/$model_id.$yext" ]; then
+						grep -q "^embeddings:.*true" "$MODELS_PATH/$model_id.$yext" 2>/dev/null && is_embedding="true"
+						break
+					fi
+				done
 				cat <<EOF
 {
 	"id": "$model_id",
 	"name": "$model_id",
 	"size": 0,
 	"type": "loaded",
-	"loaded": true
+	"loaded": true,
+	"embeddings": $is_embedding
 }
 EOF
 				i=$((i + 1))
@ -350,41 +359,54 @@ do_chat() {
 	# Log for debugging
 	logger -t localai-chat "Request to model: $model"

-	# Call LocalAI API using curl if available, otherwise wget
+	# Call LocalAI API - prefer wget (OpenWrt standard, creates output file reliably)
 	local tmpfile="/tmp/localai_chat_$$"
 	local tmpfile_err="/tmp/localai_chat_err_$$"

 	# Use longer timeout for LLM responses (120 seconds)
-	if command -v curl >/dev/null 2>&1; then
-		curl -s -m 120 -X POST "http://127.0.0.1:$API_PORT/v1/chat/completions" \
-			-H "Content-Type: application/json" \
-			-d "$request_body" \
-			-o "$tmpfile" 2>"$tmpfile_err"
-	else
-		wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
-			--header="Content-Type: application/json" \
-			"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"
-	fi
+	wget -q -T 120 -O "$tmpfile" --post-data "$request_body" \
+		--header="Content-Type: application/json" \
+		"http://127.0.0.1:$API_PORT/v1/chat/completions" 2>"$tmpfile_err"

 	if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
 		# Log raw response for debugging
 		logger -t localai-chat "Raw response: $(head -c 200 "$tmpfile")"

-		# Extract message content using jsonfilter
-		local content=$(jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null)
+		# Check for API error first
 		local error=$(jsonfilter -i "$tmpfile" -e '@.error.message' 2>/dev/null)

 		if [ -n "$error" ]; then
-			# Escape quotes and newlines in error
-			error=$(echo "$error" | sed 's/"/\\"/g' | tr '\n' ' ')
+			error=$(printf '%s' "$error" | tr '\n' ' ' | sed 's/"/\\"/g')
 			echo "{\"response\":\"\",\"error\":\"$error\"}"
-		elif [ -n "$content" ]; then
-			# Properly escape the content for JSON output
-			# Handle quotes, backslashes, and newlines
-			content=$(printf '%s' "$content" | sed 's/\\/\\\\/g; s/"/\\"/g' | awk '{printf "%s\\n", $0}' | sed 's/\\n$//')
-			echo "{\"response\":\"$content\"}"
 		else
-			echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
+			# Extract content and build JSON via file I/O to handle
+			# large responses and special characters safely
+			local outfile="/tmp/localai_out_$$"
+			local has_content=0
+
+			jsonfilter -i "$tmpfile" -e '@.choices[0].message.content' 2>/dev/null | {
+				printf '{"response":"'
+				awk '
+				BEGIN { ORS="" }
+				{
+					gsub(/\\/, "\\\\")
+					gsub(/"/, "\\\"")
+					gsub(/\t/, "\\t")
+					gsub(/\r/, "")
+					if (NR > 1) printf "\\n"
+					printf "%s", $0
+				}'
+				printf '"}'
+			} > "$outfile"
+
+			# Check if we got actual content (file should be > 17 bytes: {"response":""} )
+			local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
+			if [ "$outsize" -gt 17 ]; then
+				cat "$outfile"
+			else
+				echo '{"response":"","error":"Empty response from LocalAI API - model may not support chat format"}'
+			fi
+			rm -f "$outfile"
 		fi
 		rm -f "$tmpfile" "$tmpfile_err" 2>/dev/null
 	else
@ -411,14 +433,38 @@ do_complete() {
 		return
 	fi

-	local response=$(wget -q -O - --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
-		--header="Content-Type: application/json" \
-		"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null)
+	local tmpfile="/tmp/localai_comp_$$"

-	if [ -n "$response" ]; then
-		local text=$(echo "$response" | jsonfilter -e '@.choices[0].text' 2>/dev/null)
-		echo "{\"text\":\"$(echo "$text" | sed 's/"/\\"/g')\"}"
+	wget -q -T 120 -O "$tmpfile" --post-data "{\"model\":\"$model\",\"prompt\":\"$prompt\"}" \
+		--header="Content-Type: application/json" \
+		"http://127.0.0.1:$API_PORT/v1/completions" 2>/dev/null
+
+	if [ -f "$tmpfile" ] && [ -s "$tmpfile" ]; then
+		local outfile="/tmp/localai_compout_$$"
+		jsonfilter -i "$tmpfile" -e '@.choices[0].text' 2>/dev/null | {
+			printf '{"text":"'
+			awk '
+			BEGIN { ORS="" }
+			{
+				gsub(/\\/, "\\\\")
+				gsub(/"/, "\\\"")
+				gsub(/\t/, "\\t")
+				gsub(/\r/, "")
+				if (NR > 1) printf "\\n"
+				printf "%s", $0
+			}'
+			printf '"}'
+		} > "$outfile"
+
+		local outsize=$(stat -c %s "$outfile" 2>/dev/null || echo 0)
+		if [ "$outsize" -gt 11 ]; then
+			cat "$outfile"
+		else
+			echo '{"text":"","error":"Empty response from API"}'
+		fi
+		rm -f "$outfile" "$tmpfile"
 	else
+		rm -f "$tmpfile"
 		echo '{"text":"","error":"API request failed"}'
 	fi
 }
--- a/package/secubox/secubox-app-localai/files/usr/sbin/localaictl
+++ b/package/secubox/secubox-app-localai/files/usr/sbin/localaictl
@ -309,9 +309,10 @@ cmd_models() {

 	echo ""
 	echo "=== Available Presets ==="
-	echo "  tinyllama    - 669MB  - TinyLlama 1.1B"
-	echo "  phi2         - 1.6GB  - Microsoft Phi-2"
-	echo "  mistral      - 4.1GB  - Mistral 7B Instruct"
+	echo "  tinyllama    - 669MB  - TinyLlama 1.1B (chat)"
+	echo "  phi2         - 1.6GB  - Microsoft Phi-2 (chat)"
+	echo "  mistral      - 4.1GB  - Mistral 7B Instruct (chat)"
+	echo "  gte-small    -  67MB  - GTE Small (embeddings)"
 	echo ""
 	echo "Install: localaictl model-install <name>"
 }
@ -340,13 +341,17 @@ cmd_model_install() {
 			url="https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
 			filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
 			;;
+		gte-small)
+			url="https://huggingface.co/ggml-org/gte-small-Q8_0-GGUF/resolve/main/gte-small-q8_0.gguf"
+			filename="gte-small-q8_0.gguf"
+			;;
 		http*)
 			url="$model_name"
 			filename=$(basename "$url")
 			;;
 		*)
 			log_error "Unknown model: $model_name"
-			log_error "Use preset name (tinyllama, phi2, mistral) or full URL"
+			log_error "Use preset name (tinyllama, phi2, mistral, gte-small) or full URL"
 			return 1
 			;;
 	esac
@ -358,7 +363,22 @@ cmd_model_install() {
 	if wget -L --show-progress -O "$models_path/$filename" "$url"; then
 		# Create YAML config for the model
 		local model_id="${filename%.*}"
-		cat > "$models_path/$model_id.yaml" << EOF
+
+		# Embedding models need different config
+		case "$model_name" in
+			gte-small)
+				cat > "$models_path/$model_id.yaml" << EOF
+name: $model_id
+backend: llama-cpp
+embeddings: true
+parameters:
+  model: $filename
+context_size: 512
+threads: $threads
+EOF
+				;;
+			*)
+				cat > "$models_path/$model_id.yaml" << EOF
 name: $model_id
 backend: llama-cpp
 parameters:
@ -366,6 +386,8 @@ parameters:
 context_size: $context_size
 threads: $threads
 EOF
+				;;
+		esac
 		log_info "Model installed: $model_id"
 		log_info "Restart service to load: /etc/init.d/localai restart"
 	else