feat: CI/CD pipeline setup complete - Ansible playbooks updated, secrets configured, workflow ready
This commit is contained in:
230
.deployment-archive-20251030-111806/scripts/service-recovery.sh
Executable file
230
.deployment-archive-20251030-111806/scripts/service-recovery.sh
Executable file
@@ -0,0 +1,230 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Service Recovery Script
|
||||
# Purpose: Quick recovery for common service failures
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/service-recovery.sh status # Check service status
|
||||
# ./scripts/service-recovery.sh restart # Restart services
|
||||
# ./scripts/service-recovery.sh recover # Full recovery procedure
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||||
|
||||
PRODUCTION_SERVER="94.16.110.151"
|
||||
STACK_NAME="framework"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1" >&2
|
||||
}
|
||||
|
||||
log_warn() {
|
||||
echo -e "${YELLOW}[WARN]${NC} $1"
|
||||
}
|
||||
|
||||
log_info() {
|
||||
echo -e "${GREEN}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
log_step() {
|
||||
echo -e "${BLUE}[STEP]${NC} $1"
|
||||
}
|
||||
|
||||
# SSH helper
|
||||
ssh_exec() {
|
||||
ssh -i ~/.ssh/production deploy@"${PRODUCTION_SERVER}" "$@"
|
||||
}
|
||||
|
||||
# Check service status
|
||||
check_status() {
|
||||
log_step "Checking service status..."
|
||||
|
||||
echo ""
|
||||
log_info "Docker Swarm Services:"
|
||||
ssh_exec "docker service ls --filter 'name=${STACK_NAME}'"
|
||||
|
||||
echo ""
|
||||
log_info "Web Service Details:"
|
||||
ssh_exec "docker service ps ${STACK_NAME}_web --no-trunc"
|
||||
|
||||
echo ""
|
||||
log_info "Queue Worker Details:"
|
||||
ssh_exec "docker service ps ${STACK_NAME}_queue-worker --no-trunc"
|
||||
|
||||
echo ""
|
||||
log_info "Service Logs (last 50 lines):"
|
||||
ssh_exec "docker service logs ${STACK_NAME}_web --tail 50"
|
||||
}
|
||||
|
||||
# Restart services
|
||||
restart_services() {
|
||||
log_step "Restarting services..."
|
||||
|
||||
echo ""
|
||||
log_warn "This will restart all framework services"
|
||||
read -p "Continue? (yes/no): " -r
|
||||
|
||||
if [[ ! "$REPLY" =~ ^[Yy][Ee][Ss]$ ]]; then
|
||||
log_info "Restart cancelled"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Restart web service
|
||||
log_info "Restarting web service..."
|
||||
ssh_exec "docker service update --force ${STACK_NAME}_web"
|
||||
|
||||
# Restart worker service
|
||||
log_info "Restarting queue worker..."
|
||||
ssh_exec "docker service update --force ${STACK_NAME}_queue-worker"
|
||||
|
||||
# Wait for services to stabilize
|
||||
log_info "Waiting for services to stabilize (30 seconds)..."
|
||||
sleep 30
|
||||
|
||||
# Check status
|
||||
check_status
|
||||
}
|
||||
|
||||
# Full recovery procedure
|
||||
full_recovery() {
|
||||
log_step "Running full recovery procedure..."
|
||||
|
||||
echo ""
|
||||
log_warn "╔════════════════════════════════════════════════════════╗"
|
||||
log_warn "║ FULL SERVICE RECOVERY PROCEDURE ║"
|
||||
log_warn "╚════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
|
||||
# Step 1: Check current status
|
||||
log_info "Step 1/5: Check current status"
|
||||
check_status
|
||||
|
||||
# Step 2: Check Docker Swarm health
|
||||
log_info "Step 2/5: Check Docker Swarm health"
|
||||
SWARM_STATUS=$(ssh_exec "docker info | grep 'Swarm: active' || echo 'inactive'")
|
||||
|
||||
if [[ "$SWARM_STATUS" == "inactive" ]]; then
|
||||
log_error "Docker Swarm is not active!"
|
||||
log_info "Attempting to reinitialize Swarm..."
|
||||
ssh_exec "docker swarm init --advertise-addr ${PRODUCTION_SERVER}" || true
|
||||
else
|
||||
log_info "Docker Swarm is active"
|
||||
fi
|
||||
|
||||
# Step 3: Verify network and volumes
|
||||
log_info "Step 3/5: Verify Docker resources"
|
||||
ssh_exec "docker network ls | grep ${STACK_NAME} || docker network create --driver overlay ${STACK_NAME}_network"
|
||||
|
||||
# Step 4: Restart services
|
||||
log_info "Step 4/5: Restart services"
|
||||
ssh_exec "docker service update --force ${STACK_NAME}_web"
|
||||
ssh_exec "docker service update --force ${STACK_NAME}_queue-worker"
|
||||
|
||||
log_info "Waiting for services to stabilize (45 seconds)..."
|
||||
sleep 45
|
||||
|
||||
# Step 5: Health check
|
||||
log_info "Step 5/5: Run health checks"
|
||||
|
||||
HEALTH_CHECK=$(curl -f -k https://michaelschiemer.de/health 2>/dev/null && echo "OK" || echo "FAILED")
|
||||
|
||||
if [[ "$HEALTH_CHECK" == "OK" ]]; then
|
||||
log_info "✅ Health check passed"
|
||||
else
|
||||
log_error "❌ Health check failed"
|
||||
log_warn "Manual intervention may be required"
|
||||
log_warn "Check logs: ssh deploy@${PRODUCTION_SERVER} 'docker service logs ${STACK_NAME}_web --tail 100'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
log_warn "╔════════════════════════════════════════════════════════╗"
|
||||
log_warn "║ RECOVERY PROCEDURE COMPLETED ║"
|
||||
log_warn "╚════════════════════════════════════════════════════════╝"
|
||||
echo ""
|
||||
log_info "Application: https://michaelschiemer.de"
|
||||
log_info "Services recovered successfully"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# Clear caches
|
||||
clear_caches() {
|
||||
log_step "Clearing application caches..."
|
||||
|
||||
# Clear Redis cache
|
||||
log_info "Clearing Redis cache..."
|
||||
ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_redis) redis-cli FLUSHALL" || log_warn "Redis cache clear failed"
|
||||
|
||||
# Clear file caches
|
||||
log_info "Clearing file caches..."
|
||||
ssh_exec "docker exec \$(docker ps -q -f name=${STACK_NAME}_web | head -1) rm -rf /var/www/html/storage/cache/*" || log_warn "File cache clear failed"
|
||||
|
||||
log_info "Caches cleared"
|
||||
}
|
||||
|
||||
# Show help
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
Service Recovery Script
|
||||
|
||||
Usage: $0 [command]
|
||||
|
||||
Commands:
|
||||
status Check service status and logs
|
||||
restart Restart all services
|
||||
recover Run full recovery procedure (recommended)
|
||||
clear-cache Clear application caches
|
||||
help Show this help
|
||||
|
||||
Examples:
|
||||
$0 status # Quick status check
|
||||
$0 recover # Full automated recovery
|
||||
$0 restart # Just restart services
|
||||
$0 clear-cache # Clear caches only
|
||||
|
||||
Emergency Recovery:
|
||||
1. Check status: $0 status
|
||||
2. Run recovery: $0 recover
|
||||
3. If still failing, check logs manually:
|
||||
ssh deploy@${PRODUCTION_SERVER} 'docker service logs ${STACK_NAME}_web --tail 200'
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
# Main
|
||||
main() {
|
||||
case "${1:-help}" in
|
||||
status)
|
||||
check_status
|
||||
;;
|
||||
restart)
|
||||
restart_services
|
||||
;;
|
||||
recover)
|
||||
full_recovery
|
||||
;;
|
||||
clear-cache)
|
||||
clear_caches
|
||||
;;
|
||||
help|--help|-h)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
log_error "Unknown command: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Reference in New Issue
Block a user