#!/bin/bash # # Deployment Diagnostics Script # Purpose: Comprehensive diagnostics for troubleshooting deployment issues # # Usage: # ./scripts/deployment-diagnostics.sh # Run all diagnostics # ./scripts/deployment-diagnostics.sh --quick # Quick checks only # ./scripts/deployment-diagnostics.sh --verbose # Verbose output # set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" PRODUCTION_SERVER="94.16.110.151" REGISTRY="git.michaelschiemer.de:5000" STACK_NAME="framework" IMAGE="framework" QUICK_MODE=false VERBOSE=false # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_error() { echo -e "${RED}✗${NC} $1" } log_success() { echo -e "${GREEN}✓${NC} $1" } log_warn() { echo -e "${YELLOW}⚠${NC} $1" } log_info() { echo -e "${BLUE}ℹ${NC} $1" } log_section() { echo "" echo -e "${CYAN}═══ $1 ═══${NC}" } # SSH helper ssh_exec() { ssh -i ~/.ssh/production deploy@"${PRODUCTION_SERVER}" "$@" 2>/dev/null || echo "SSH_FAILED" } # Check local prerequisites check_local() { log_section "Local Environment" # Git status if git status &> /dev/null; then log_success "Git repository detected" BRANCH=$(git rev-parse --abbrev-ref HEAD) log_info "Current branch: ${BRANCH}" if [[ -n $(git status --porcelain) ]]; then log_warn "Working directory has uncommitted changes" else log_success "Working directory is clean" fi else log_error "Not in a git repository" fi # Docker if command -v docker &> /dev/null; then log_success "Docker installed" DOCKER_VERSION=$(docker --version | cut -d' ' -f3 | tr -d ',') log_info "Version: ${DOCKER_VERSION}" else log_error "Docker not found" fi # Ansible if command -v ansible-playbook &> /dev/null; then log_success "Ansible installed" ANSIBLE_VERSION=$(ansible-playbook --version | head -1 | cut -d' ' -f2) log_info "Version: ${ANSIBLE_VERSION}" else log_error "Ansible not found" fi # SSH key if [[ -f ~/.ssh/production ]]; then log_success "Production SSH key found" else log_error "Production SSH key not found at ~/.ssh/production" fi } # Check SSH connectivity check_ssh() { log_section "SSH Connectivity" RESULT=$(ssh_exec "echo 'OK'") if [[ "$RESULT" == "OK" ]]; then log_success "SSH connection to production server" else log_error "Cannot connect to production server via SSH" log_info "Check: ssh -i ~/.ssh/production deploy@${PRODUCTION_SERVER}" return 1 fi } # Check Docker Swarm check_docker_swarm() { log_section "Docker Swarm Status" SWARM_STATUS=$(ssh_exec "docker info | grep 'Swarm:' | awk '{print \$2}'") if [[ "$SWARM_STATUS" == "active" ]]; then log_success "Docker Swarm is active" # Manager nodes MANAGERS=$(ssh_exec "docker node ls --filter role=manager --format '{{.Hostname}}'") log_info "Manager nodes: ${MANAGERS}" # Worker nodes WORKERS=$(ssh_exec "docker node ls --filter role=worker --format '{{.Hostname}}' | wc -l") log_info "Worker nodes: ${WORKERS}" else log_error "Docker Swarm is not active" return 1 fi } # Check services check_services() { log_section "Framework Services" # List services SERVICES=$(ssh_exec "docker service ls --filter 'name=${STACK_NAME}' --format '{{.Name}}: {{.Replicas}}'") if [[ -n "$SERVICES" ]]; then log_success "Framework services found" echo "$SERVICES" | while read -r line; do log_info "$line" done else log_error "No framework services found" return 1 fi # Check web service WEB_STATUS=$(ssh_exec "docker service ps ${STACK_NAME}_web --filter 'desired-state=running' --format '{{.CurrentState}}' | head -1") if [[ "$WEB_STATUS" =~ Running ]]; then log_success "Web service is running" else log_error "Web service is not running: ${WEB_STATUS}" fi # Check worker service WORKER_STATUS=$(ssh_exec "docker service ps ${STACK_NAME}_queue-worker --filter 'desired-state=running' --format '{{.CurrentState}}' | head -1") if [[ "$WORKER_STATUS" =~ Running ]]; then log_success "Queue worker is running" else log_error "Queue worker is not running: ${WORKER_STATUS}" fi } # Check Docker images check_images() { log_section "Docker Images" # Current running image CURRENT_IMAGE=$(ssh_exec "docker service inspect ${STACK_NAME}_web --format '{{.Spec.TaskTemplate.ContainerSpec.Image}}'") if [[ -n "$CURRENT_IMAGE" ]]; then log_success "Current image: ${CURRENT_IMAGE}" else log_error "Cannot determine current image" fi # Available images (last 5) log_info "Available images (last 5):" ssh_exec "docker images ${REGISTRY}/${IMAGE} --format ' {{.Tag}} ({{.CreatedAt}})' | grep -v buildcache | head -5" } # Check networks check_networks() { log_section "Docker Networks" NETWORKS=$(ssh_exec "docker network ls --filter 'name=${STACK_NAME}' --format '{{.Name}}: {{.Driver}}'") if [[ -n "$NETWORKS" ]]; then log_success "Framework networks found" echo "$NETWORKS" | while read -r line; do log_info "$line" done else log_warn "No framework-specific networks found" fi } # Check volumes check_volumes() { log_section "Docker Volumes" VOLUMES=$(ssh_exec "docker volume ls --filter 'name=${STACK_NAME}' --format '{{.Name}}'") if [[ -n "$VOLUMES" ]]; then log_success "Framework volumes found" echo "$VOLUMES" | while read -r line; do log_info "$line" done else log_warn "No framework-specific volumes found" fi } # Check application health check_app_health() { log_section "Application Health" # Main health endpoint HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health || echo "000") if [[ "$HTTP_CODE" == "200" ]] || [[ "$HTTP_CODE" == "302" ]]; then log_success "Application health endpoint: ${HTTP_CODE}" else log_error "Application health endpoint failed: ${HTTP_CODE}" fi # Database health DB_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health/database || echo "000") if [[ "$DB_CODE" == "200" ]]; then log_success "Database connectivity: OK" else log_warn "Database connectivity: ${DB_CODE}" fi # Redis health REDIS_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health/redis || echo "000") if [[ "$REDIS_CODE" == "200" ]]; then log_success "Redis connectivity: OK" else log_warn "Redis connectivity: ${REDIS_CODE}" fi } # Check Docker secrets check_secrets() { log_section "Docker Secrets" SECRETS=$(ssh_exec "docker secret ls --format '{{.Name}}' | wc -l") if [[ "$SECRETS" -gt 0 ]]; then log_success "Docker secrets configured: ${SECRETS} secrets" else log_warn "No Docker secrets found" fi } # Check recent logs check_logs() { log_section "Recent Logs" log_info "Last 20 lines from web service:" ssh_exec "docker service logs ${STACK_NAME}_web --tail 20" } # Check Gitea runner check_gitea_runner() { log_section "Gitea Actions Runner" RUNNER_STATUS=$(ssh_exec "systemctl is-active gitea-runner 2>/dev/null || echo 'not-found'") if [[ "$RUNNER_STATUS" == "active" ]]; then log_success "Gitea runner service is active" elif [[ "$RUNNER_STATUS" == "not-found" ]]; then log_warn "Gitea runner service not found (may not be installed yet)" else log_error "Gitea runner service is ${RUNNER_STATUS}" fi } # Resource usage check_resources() { log_section "Resource Usage" # Disk usage DISK_USAGE=$(ssh_exec "df -h / | tail -1 | awk '{print \$5}'") log_info "Disk usage: ${DISK_USAGE}" # Memory usage MEMORY_USAGE=$(ssh_exec "free -h | grep Mem | awk '{print \$3\"/\"\$2}'") log_info "Memory usage: ${MEMORY_USAGE}" # Docker disk usage log_info "Docker disk usage:" ssh_exec "docker system df" } # Parse arguments for arg in "$@"; do case $arg in --quick) QUICK_MODE=true ;; --verbose) VERBOSE=true ;; esac done # Main diagnostics main() { echo "" echo -e "${CYAN}╔════════════════════════════════════════════════════════╗${NC}" echo -e "${CYAN}║ DEPLOYMENT DIAGNOSTICS REPORT ║${NC}" echo -e "${CYAN}╚════════════════════════════════════════════════════════╝${NC}" echo "" check_local check_ssh || { log_error "SSH connectivity failed - cannot continue"; exit 1; } check_docker_swarm check_services check_images check_app_health if [[ "$QUICK_MODE" == false ]]; then check_networks check_volumes check_secrets check_gitea_runner check_resources if [[ "$VERBOSE" == true ]]; then check_logs fi fi echo "" echo -e "${CYAN}╔════════════════════════════════════════════════════════╗${NC}" echo -e "${CYAN}║ DIAGNOSTICS COMPLETED ║${NC}" echo -e "${CYAN}╚════════════════════════════════════════════════════════╝${NC}" echo "" log_info "For detailed logs: ./scripts/deployment-diagnostics.sh --verbose" log_info "For service recovery: ./scripts/service-recovery.sh recover" echo "" } main "$@"