#!/bin/bash # # Service Recovery Script # Purpose: Quick recovery for common service failures # # Usage: # ./scripts/service-recovery.sh status # Check service status # ./scripts/service-recovery.sh restart # Restart services # ./scripts/service-recovery.sh recover # Full recovery procedure # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" PRODUCTION_SERVER="94.16.110.151" STACK_NAME="framework" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_error() { echo -e "${RED}[ERROR]${NC} $1" >&2 } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1" } log_info() { echo -e "${GREEN}[INFO]${NC} $1" } log_step() { echo -e "${BLUE}[STEP]${NC} $1" } # SSH helper ssh_exec() { ssh -i ~/.ssh/production deploy@"${PRODUCTION_SERVER}" "$@" } # Check service status check_status() { log_step "Checking service status..." echo "" log_info "Docker Swarm Services:" ssh_exec "docker service ls --filter 'name=${STACK_NAME}'" echo "" log_info "Web Service Details:" ssh_exec "docker service ps ${STACK_NAME}_web --no-trunc" echo "" log_info "Queue Worker Details:" ssh_exec "docker service ps ${STACK_NAME}_queue-worker --no-trunc" echo "" log_info "Service Logs (last 50 lines):" ssh_exec "docker service logs ${STACK_NAME}_web --tail 50" } # Restart services restart_services() { log_step "Restarting services..." echo "" log_warn "This will restart all framework services" read -p "Continue? (yes/no): " -r if [[ ! "$REPLY" =~ ^[Yy][Ee][Ss]$ ]]; then log_info "Restart cancelled" exit 0 fi # Restart web service log_info "Restarting web service..." ssh_exec "docker service update --force ${STACK_NAME}_web" # Restart worker service log_info "Restarting queue worker..." ssh_exec "docker service update --force ${STACK_NAME}_queue-worker" # Wait for services to stabilize log_info "Waiting for services to stabilize (30 seconds)..." sleep 30 # Check status check_status } # Full recovery procedure full_recovery() { log_step "Running full recovery procedure..." echo "" log_warn "╔════════════════════════════════════════════════════════╗" log_warn "║ FULL SERVICE RECOVERY PROCEDURE ║" log_warn "╚════════════════════════════════════════════════════════╝" echo "" # Step 1: Check current status log_info "Step 1/5: Check current status" check_status # Step 2: Check Docker Swarm health log_info "Step 2/5: Check Docker Swarm health" SWARM_STATUS=$(ssh_exec "docker info | grep 'Swarm: active' || echo 'inactive'") if [[ "$SWARM_STATUS" == "inactive" ]]; then log_error "Docker Swarm is not active!" log_info "Attempting to reinitialize Swarm..." ssh_exec "docker swarm init --advertise-addr ${PRODUCTION_SERVER}" || true else log_info "Docker Swarm is active" fi # Step 3: Verify network and volumes log_info "Step 3/5: Verify Docker resources" ssh_exec "docker network ls | grep ${STACK_NAME} || docker network create --driver overlay ${STACK_NAME}_network" # Step 4: Restart services log_info "Step 4/5: Restart services" ssh_exec "docker service update --force ${STACK_NAME}_web" ssh_exec "docker service update --force ${STACK_NAME}_queue-worker" log_info "Waiting for services to stabilize (45 seconds)..." sleep 45 # Step 5: Health check log_info "Step 5/5: Run health checks" HEALTH_CHECK=$(curl -f -k https://michaelschiemer.de/health 2>/dev/null && echo "OK" || echo "FAILED") if [[ "$HEALTH_CHECK" == "OK" ]]; then log_info "✅ Health check passed" else log_error "❌ Health check failed" log_warn "Manual intervention may be required" log_warn "Check logs: ssh deploy@${PRODUCTION_SERVER} 'docker service logs ${STACK_NAME}_web --tail 100'" exit 1 fi echo "" log_warn "╔════════════════════════════════════════════════════════╗" log_warn "║ RECOVERY PROCEDURE COMPLETED ║" log_warn "╚════════════════════════════════════════════════════════╝" echo "" log_info "Application: https://michaelschiemer.de" log_info "Services recovered successfully" echo "" } # Clear caches clear_caches() { log_step "Clearing application caches..." # Clear Redis cache log_info "Clearing Redis cache..." ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_redis) redis-cli FLUSHALL" || log_warn "Redis cache clear failed" # Clear file caches log_info "Clearing file caches..." ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_web | head -1) rm -rf /var/www/html/storage/cache/*" || log_warn "File cache clear failed" log_info "Caches cleared" } # Show help show_help() { cat <