fix: Improve LAPI repair with retries and better error handling

- Stop CrowdSec before repair for clean state
- Create all required directories with proper permissions
- Regenerate local_api_credentials.yaml if missing
- Wait for LAPI port 8080 with retries before machine registration
- Use 30s timeout for repair operations
- Add retry logic for final LAPI verification
- Better error reporting with detailed steps

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
CyberMind-FR 2026-01-09 18:05:48 +01:00
parent 252341e045
commit 7975b22ca3

View File

@ -881,68 +881,108 @@ repair_lapi() {
json_init json_init
local steps_done="" local steps_done=""
local errors="" local errors=""
local REPAIR_TIMEOUT=30
secubox_log "Starting LAPI repair..." secubox_log "Starting LAPI repair..."
# Step 1: Create data directory # Step 1: Stop CrowdSec first for clean repair
if [ ! -d "/srv/crowdsec/data" ]; then /etc/init.d/crowdsec stop >/dev/null 2>&1
mkdir -p /srv/crowdsec/data 2>/dev/null sleep 1
if [ -d "/srv/crowdsec/data" ]; then steps_done="${steps_done}Stopped CrowdSec; "
steps_done="${steps_done}Created /srv/crowdsec/data; "
else
errors="${errors}Failed to create data directory; "
fi
fi
# Step 2: Fix config.yaml - ensure data_dir and db_path are set # Step 2: Create required directories
mkdir -p /srv/crowdsec/data 2>/dev/null
mkdir -p /etc/crowdsec/hub 2>/dev/null
mkdir -p /var/log 2>/dev/null
chown -R root:root /srv/crowdsec 2>/dev/null
chmod 755 /srv/crowdsec/data 2>/dev/null
steps_done="${steps_done}Created directories; "
# Step 3: Fix config.yaml - ensure data_dir and db_path are set
local config_file="/etc/crowdsec/config.yaml" local config_file="/etc/crowdsec/config.yaml"
if [ -f "$config_file" ]; then if [ -f "$config_file" ]; then
# Check if data_dir is empty or missing # Backup original
local current_data_dir=$(grep "^ data_dir:" "$config_file" | awk '{print $2}') cp "$config_file" "${config_file}.bak" 2>/dev/null
if [ -z "$current_data_dir" ] || [ "$current_data_dir" = "" ]; then
sed -i 's|^ data_dir:.*| data_dir: /srv/crowdsec/data/|' "$config_file" # Fix data_dir if empty or wrong
steps_done="${steps_done}Fixed data_dir in config; " if ! grep -q "data_dir: /srv/crowdsec/data" "$config_file"; then
sed -i 's|^ data_dir:.*| data_dir: /srv/crowdsec/data/|' "$config_file" 2>/dev/null
steps_done="${steps_done}Fixed data_dir; "
fi fi
# Check if db_path is empty or missing # Fix db_path if empty or wrong
local current_db_path=$(grep "^ db_path:" "$config_file" | awk '{print $2}') if ! grep -q "db_path: /srv/crowdsec/data/crowdsec.db" "$config_file"; then
if [ -z "$current_db_path" ] || [ "$current_db_path" = "" ]; then sed -i 's|^ db_path:.*| db_path: /srv/crowdsec/data/crowdsec.db|' "$config_file" 2>/dev/null
sed -i 's|^ db_path:.*| db_path: /srv/crowdsec/data/crowdsec.db|' "$config_file" steps_done="${steps_done}Fixed db_path; "
steps_done="${steps_done}Fixed db_path in config; "
fi fi
else else
errors="${errors}Config file not found; " errors="${errors}Config file not found; "
fi fi
# Step 3: Restart CrowdSec to apply config changes # Step 4: Check/regenerate local_api_credentials.yaml
if /etc/init.d/crowdsec restart >/dev/null 2>&1; then local creds_file="/etc/crowdsec/local_api_credentials.yaml"
steps_done="${steps_done}Restarted CrowdSec; " if [ ! -f "$creds_file" ] || [ ! -s "$creds_file" ]; then
sleep 2 # Generate new credentials
else cat > "$creds_file" << 'CREDS'
errors="${errors}Failed to restart CrowdSec; " url: http://127.0.0.1:8080/
login: localhost
password:
CREDS
steps_done="${steps_done}Created credentials file; "
fi fi
# Step 4: Re-register local machine if needed # Step 5: Start CrowdSec
if [ -x "$CSCLI" ]; then if /etc/init.d/crowdsec start >/dev/null 2>&1; then
# Check if machine is registered and working steps_done="${steps_done}Started CrowdSec; "
if ! run_cscli machines list >/dev/null 2>&1; then else
# Force re-register errors="${errors}Failed to start CrowdSec; "
if run_cscli machines add localhost --auto --force >/dev/null 2>&1; then fi
steps_done="${steps_done}Re-registered localhost machine; "
# Restart again to apply new credentials # Step 6: Wait for LAPI to become available (with retries)
/etc/init.d/crowdsec restart >/dev/null 2>&1 local retries=10
sleep 2 local lapi_ready=0
else while [ $retries -gt 0 ]; do
errors="${errors}Failed to register machine; " sleep 2
fi # Check if port 8080 is listening
if netstat -tln 2>/dev/null | grep -q ":8080 " || ss -tln 2>/dev/null | grep -q ":8080 "; then
lapi_ready=1
break
fi
retries=$((retries - 1))
done
if [ "$lapi_ready" = "0" ]; then
errors="${errors}LAPI not listening on port 8080; "
else
steps_done="${steps_done}LAPI port listening; "
fi
# Step 7: Re-register local machine with longer timeout
if [ -x "$CSCLI" ] && [ "$lapi_ready" = "1" ]; then
# Force re-register machine
if timeout "$REPAIR_TIMEOUT" "$CSCLI" machines add localhost --auto --force >/dev/null 2>&1; then
steps_done="${steps_done}Registered localhost; "
else
# Try without --force
timeout "$REPAIR_TIMEOUT" "$CSCLI" machines add localhost --auto >/dev/null 2>&1
steps_done="${steps_done}Attempted machine registration; "
fi fi
fi fi
# Step 5: Verify LAPI is now working # Step 8: Final LAPI verification with retries
local lapi_ok=0 local lapi_ok=0
if [ -x "$CSCLI" ]; then if [ -x "$CSCLI" ]; then
if run_cscli lapi status >/dev/null 2>&1; then retries=5
lapi_ok=1 while [ $retries -gt 0 ]; do
if timeout "$REPAIR_TIMEOUT" "$CSCLI" lapi status >/dev/null 2>&1; then
lapi_ok=1
break
fi
sleep 2
retries=$((retries - 1))
done
if [ "$lapi_ok" = "1" ]; then
steps_done="${steps_done}LAPI verified working" steps_done="${steps_done}LAPI verified working"
else else
errors="${errors}LAPI still not responding" errors="${errors}LAPI still not responding"