fix: Improve LAPI repair with retries and better error handling
- Stop CrowdSec before repair for clean state - Create all required directories with proper permissions - Regenerate local_api_credentials.yaml if missing - Wait for LAPI port 8080 with retries before machine registration - Use 30s timeout for repair operations - Add retry logic for final LAPI verification - Better error reporting with detailed steps Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
252341e045
commit
7975b22ca3
@ -881,68 +881,108 @@ repair_lapi() {
|
|||||||
json_init
|
json_init
|
||||||
local steps_done=""
|
local steps_done=""
|
||||||
local errors=""
|
local errors=""
|
||||||
|
local REPAIR_TIMEOUT=30
|
||||||
|
|
||||||
secubox_log "Starting LAPI repair..."
|
secubox_log "Starting LAPI repair..."
|
||||||
|
|
||||||
# Step 1: Create data directory
|
# Step 1: Stop CrowdSec first for clean repair
|
||||||
if [ ! -d "/srv/crowdsec/data" ]; then
|
/etc/init.d/crowdsec stop >/dev/null 2>&1
|
||||||
mkdir -p /srv/crowdsec/data 2>/dev/null
|
sleep 1
|
||||||
if [ -d "/srv/crowdsec/data" ]; then
|
steps_done="${steps_done}Stopped CrowdSec; "
|
||||||
steps_done="${steps_done}Created /srv/crowdsec/data; "
|
|
||||||
else
|
|
||||||
errors="${errors}Failed to create data directory; "
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Step 2: Fix config.yaml - ensure data_dir and db_path are set
|
# Step 2: Create required directories
|
||||||
|
mkdir -p /srv/crowdsec/data 2>/dev/null
|
||||||
|
mkdir -p /etc/crowdsec/hub 2>/dev/null
|
||||||
|
mkdir -p /var/log 2>/dev/null
|
||||||
|
chown -R root:root /srv/crowdsec 2>/dev/null
|
||||||
|
chmod 755 /srv/crowdsec/data 2>/dev/null
|
||||||
|
steps_done="${steps_done}Created directories; "
|
||||||
|
|
||||||
|
# Step 3: Fix config.yaml - ensure data_dir and db_path are set
|
||||||
local config_file="/etc/crowdsec/config.yaml"
|
local config_file="/etc/crowdsec/config.yaml"
|
||||||
if [ -f "$config_file" ]; then
|
if [ -f "$config_file" ]; then
|
||||||
# Check if data_dir is empty or missing
|
# Backup original
|
||||||
local current_data_dir=$(grep "^ data_dir:" "$config_file" | awk '{print $2}')
|
cp "$config_file" "${config_file}.bak" 2>/dev/null
|
||||||
if [ -z "$current_data_dir" ] || [ "$current_data_dir" = "" ]; then
|
|
||||||
sed -i 's|^ data_dir:.*| data_dir: /srv/crowdsec/data/|' "$config_file"
|
# Fix data_dir if empty or wrong
|
||||||
steps_done="${steps_done}Fixed data_dir in config; "
|
if ! grep -q "data_dir: /srv/crowdsec/data" "$config_file"; then
|
||||||
|
sed -i 's|^ data_dir:.*| data_dir: /srv/crowdsec/data/|' "$config_file" 2>/dev/null
|
||||||
|
steps_done="${steps_done}Fixed data_dir; "
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if db_path is empty or missing
|
# Fix db_path if empty or wrong
|
||||||
local current_db_path=$(grep "^ db_path:" "$config_file" | awk '{print $2}')
|
if ! grep -q "db_path: /srv/crowdsec/data/crowdsec.db" "$config_file"; then
|
||||||
if [ -z "$current_db_path" ] || [ "$current_db_path" = "" ]; then
|
sed -i 's|^ db_path:.*| db_path: /srv/crowdsec/data/crowdsec.db|' "$config_file" 2>/dev/null
|
||||||
sed -i 's|^ db_path:.*| db_path: /srv/crowdsec/data/crowdsec.db|' "$config_file"
|
steps_done="${steps_done}Fixed db_path; "
|
||||||
steps_done="${steps_done}Fixed db_path in config; "
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
errors="${errors}Config file not found; "
|
errors="${errors}Config file not found; "
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 3: Restart CrowdSec to apply config changes
|
# Step 4: Check/regenerate local_api_credentials.yaml
|
||||||
if /etc/init.d/crowdsec restart >/dev/null 2>&1; then
|
local creds_file="/etc/crowdsec/local_api_credentials.yaml"
|
||||||
steps_done="${steps_done}Restarted CrowdSec; "
|
if [ ! -f "$creds_file" ] || [ ! -s "$creds_file" ]; then
|
||||||
sleep 2
|
# Generate new credentials
|
||||||
else
|
cat > "$creds_file" << 'CREDS'
|
||||||
errors="${errors}Failed to restart CrowdSec; "
|
url: http://127.0.0.1:8080/
|
||||||
|
login: localhost
|
||||||
|
password:
|
||||||
|
CREDS
|
||||||
|
steps_done="${steps_done}Created credentials file; "
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 4: Re-register local machine if needed
|
# Step 5: Start CrowdSec
|
||||||
if [ -x "$CSCLI" ]; then
|
if /etc/init.d/crowdsec start >/dev/null 2>&1; then
|
||||||
# Check if machine is registered and working
|
steps_done="${steps_done}Started CrowdSec; "
|
||||||
if ! run_cscli machines list >/dev/null 2>&1; then
|
else
|
||||||
# Force re-register
|
errors="${errors}Failed to start CrowdSec; "
|
||||||
if run_cscli machines add localhost --auto --force >/dev/null 2>&1; then
|
fi
|
||||||
steps_done="${steps_done}Re-registered localhost machine; "
|
|
||||||
# Restart again to apply new credentials
|
# Step 6: Wait for LAPI to become available (with retries)
|
||||||
/etc/init.d/crowdsec restart >/dev/null 2>&1
|
local retries=10
|
||||||
sleep 2
|
local lapi_ready=0
|
||||||
else
|
while [ $retries -gt 0 ]; do
|
||||||
errors="${errors}Failed to register machine; "
|
sleep 2
|
||||||
fi
|
# Check if port 8080 is listening
|
||||||
|
if netstat -tln 2>/dev/null | grep -q ":8080 " || ss -tln 2>/dev/null | grep -q ":8080 "; then
|
||||||
|
lapi_ready=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
retries=$((retries - 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$lapi_ready" = "0" ]; then
|
||||||
|
errors="${errors}LAPI not listening on port 8080; "
|
||||||
|
else
|
||||||
|
steps_done="${steps_done}LAPI port listening; "
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Step 7: Re-register local machine with longer timeout
|
||||||
|
if [ -x "$CSCLI" ] && [ "$lapi_ready" = "1" ]; then
|
||||||
|
# Force re-register machine
|
||||||
|
if timeout "$REPAIR_TIMEOUT" "$CSCLI" machines add localhost --auto --force >/dev/null 2>&1; then
|
||||||
|
steps_done="${steps_done}Registered localhost; "
|
||||||
|
else
|
||||||
|
# Try without --force
|
||||||
|
timeout "$REPAIR_TIMEOUT" "$CSCLI" machines add localhost --auto >/dev/null 2>&1
|
||||||
|
steps_done="${steps_done}Attempted machine registration; "
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Step 5: Verify LAPI is now working
|
# Step 8: Final LAPI verification with retries
|
||||||
local lapi_ok=0
|
local lapi_ok=0
|
||||||
if [ -x "$CSCLI" ]; then
|
if [ -x "$CSCLI" ]; then
|
||||||
if run_cscli lapi status >/dev/null 2>&1; then
|
retries=5
|
||||||
lapi_ok=1
|
while [ $retries -gt 0 ]; do
|
||||||
|
if timeout "$REPAIR_TIMEOUT" "$CSCLI" lapi status >/dev/null 2>&1; then
|
||||||
|
lapi_ok=1
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
retries=$((retries - 1))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$lapi_ok" = "1" ]; then
|
||||||
steps_done="${steps_done}LAPI verified working"
|
steps_done="${steps_done}LAPI verified working"
|
||||||
else
|
else
|
||||||
errors="${errors}LAPI still not responding"
|
errors="${errors}LAPI still not responding"
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user