231 lines
6.5 KiB
Bash
Executable File
231 lines
6.5 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Service Recovery Script
|
|
# Purpose: Quick recovery for common service failures
|
|
#
|
|
# Usage:
|
|
# ./scripts/service-recovery.sh status # Check service status
|
|
# ./scripts/service-recovery.sh restart # Restart services
|
|
# ./scripts/service-recovery.sh recover # Full recovery procedure
|
|
#
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
|
|
|
PRODUCTION_SERVER="94.16.110.151"
|
|
STACK_NAME="framework"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $1" >&2
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
|
}
|
|
|
|
log_info() {
|
|
echo -e "${GREEN}[INFO]${NC} $1"
|
|
}
|
|
|
|
log_step() {
|
|
echo -e "${BLUE}[STEP]${NC} $1"
|
|
}
|
|
|
|
# SSH helper
|
|
ssh_exec() {
|
|
ssh -i ~/.ssh/production deploy@"${PRODUCTION_SERVER}" "$@"
|
|
}
|
|
|
|
# Check service status
|
|
check_status() {
|
|
log_step "Checking service status..."
|
|
|
|
echo ""
|
|
log_info "Docker Swarm Services:"
|
|
ssh_exec "docker service ls --filter 'name=${STACK_NAME}'"
|
|
|
|
echo ""
|
|
log_info "Web Service Details:"
|
|
ssh_exec "docker service ps ${STACK_NAME}_web --no-trunc"
|
|
|
|
echo ""
|
|
log_info "Queue Worker Details:"
|
|
ssh_exec "docker service ps ${STACK_NAME}_queue-worker --no-trunc"
|
|
|
|
echo ""
|
|
log_info "Service Logs (last 50 lines):"
|
|
ssh_exec "docker service logs ${STACK_NAME}_web --tail 50"
|
|
}
|
|
|
|
# Restart services
|
|
restart_services() {
|
|
log_step "Restarting services..."
|
|
|
|
echo ""
|
|
log_warn "This will restart all framework services"
|
|
read -p "Continue? (yes/no): " -r
|
|
|
|
if [[ ! "$REPLY" =~ ^[Yy][Ee][Ss]$ ]]; then
|
|
log_info "Restart cancelled"
|
|
exit 0
|
|
fi
|
|
|
|
# Restart web service
|
|
log_info "Restarting web service..."
|
|
ssh_exec "docker service update --force ${STACK_NAME}_web"
|
|
|
|
# Restart worker service
|
|
log_info "Restarting queue worker..."
|
|
ssh_exec "docker service update --force ${STACK_NAME}_queue-worker"
|
|
|
|
# Wait for services to stabilize
|
|
log_info "Waiting for services to stabilize (30 seconds)..."
|
|
sleep 30
|
|
|
|
# Check status
|
|
check_status
|
|
}
|
|
|
|
# Full recovery procedure
|
|
full_recovery() {
|
|
log_step "Running full recovery procedure..."
|
|
|
|
echo ""
|
|
log_warn "╔════════════════════════════════════════════════════════╗"
|
|
log_warn "║ FULL SERVICE RECOVERY PROCEDURE ║"
|
|
log_warn "╚════════════════════════════════════════════════════════╝"
|
|
echo ""
|
|
|
|
# Step 1: Check current status
|
|
log_info "Step 1/5: Check current status"
|
|
check_status
|
|
|
|
# Step 2: Check Docker Swarm health
|
|
log_info "Step 2/5: Check Docker Swarm health"
|
|
SWARM_STATUS=$(ssh_exec "docker info | grep 'Swarm: active' || echo 'inactive'")
|
|
|
|
if [[ "$SWARM_STATUS" == "inactive" ]]; then
|
|
log_error "Docker Swarm is not active!"
|
|
log_info "Attempting to reinitialize Swarm..."
|
|
ssh_exec "docker swarm init --advertise-addr ${PRODUCTION_SERVER}" || true
|
|
else
|
|
log_info "Docker Swarm is active"
|
|
fi
|
|
|
|
# Step 3: Verify network and volumes
|
|
log_info "Step 3/5: Verify Docker resources"
|
|
ssh_exec "docker network ls | grep ${STACK_NAME} || docker network create --driver overlay ${STACK_NAME}_network"
|
|
|
|
# Step 4: Restart services
|
|
log_info "Step 4/5: Restart services"
|
|
ssh_exec "docker service update --force ${STACK_NAME}_web"
|
|
ssh_exec "docker service update --force ${STACK_NAME}_queue-worker"
|
|
|
|
log_info "Waiting for services to stabilize (45 seconds)..."
|
|
sleep 45
|
|
|
|
# Step 5: Health check
|
|
log_info "Step 5/5: Run health checks"
|
|
|
|
HEALTH_CHECK=$(curl -f -k https://michaelschiemer.de/health 2>/dev/null && echo "OK" || echo "FAILED")
|
|
|
|
if [[ "$HEALTH_CHECK" == "OK" ]]; then
|
|
log_info "✅ Health check passed"
|
|
else
|
|
log_error "❌ Health check failed"
|
|
log_warn "Manual intervention may be required"
|
|
log_warn "Check logs: ssh deploy@${PRODUCTION_SERVER} 'docker service logs ${STACK_NAME}_web --tail 100'"
|
|
exit 1
|
|
fi
|
|
|
|
echo ""
|
|
log_warn "╔════════════════════════════════════════════════════════╗"
|
|
log_warn "║ RECOVERY PROCEDURE COMPLETED ║"
|
|
log_warn "╚════════════════════════════════════════════════════════╝"
|
|
echo ""
|
|
log_info "Application: https://michaelschiemer.de"
|
|
log_info "Services recovered successfully"
|
|
echo ""
|
|
}
|
|
|
|
# Clear caches
|
|
clear_caches() {
|
|
log_step "Clearing application caches..."
|
|
|
|
# Clear Redis cache
|
|
log_info "Clearing Redis cache..."
|
|
ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_redis) redis-cli FLUSHALL" || log_warn "Redis cache clear failed"
|
|
|
|
# Clear file caches
|
|
log_info "Clearing file caches..."
|
|
ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_web | head -1) rm -rf /var/www/html/storage/cache/*" || log_warn "File cache clear failed"
|
|
|
|
log_info "Caches cleared"
|
|
}
|
|
|
|
# Show help
|
|
show_help() {
|
|
cat <<EOF
|
|
Service Recovery Script
|
|
|
|
Usage: $0 [command]
|
|
|
|
Commands:
|
|
status Check service status and logs
|
|
restart Restart all services
|
|
recover Run full recovery procedure (recommended)
|
|
clear-cache Clear application caches
|
|
help Show this help
|
|
|
|
Examples:
|
|
$0 status # Quick status check
|
|
$0 recover # Full automated recovery
|
|
$0 restart # Just restart services
|
|
$0 clear-cache # Clear caches only
|
|
|
|
Emergency Recovery:
|
|
1. Check status: $0 status
|
|
2. Run recovery: $0 recover
|
|
3. If still failing, check logs manually:
|
|
ssh deploy@${PRODUCTION_SERVER} 'docker service logs ${STACK_NAME}_web --tail 200'
|
|
|
|
EOF
|
|
}
|
|
|
|
# Main
|
|
main() {
|
|
case "${1:-help}" in
|
|
status)
|
|
check_status
|
|
;;
|
|
restart)
|
|
restart_services
|
|
;;
|
|
recover)
|
|
full_recovery
|
|
;;
|
|
clear-cache)
|
|
clear_caches
|
|
;;
|
|
help|--help|-h)
|
|
show_help
|
|
;;
|
|
*)
|
|
log_error "Unknown command: $1"
|
|
show_help
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|