362 lines
10 KiB
Bash
Executable File
362 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
||
#
|
||
# Deployment Diagnostics Script
|
||
# Purpose: Comprehensive diagnostics for troubleshooting deployment issues
|
||
#
|
||
# Usage:
|
||
# ./scripts/deployment-diagnostics.sh # Run all diagnostics
|
||
# ./scripts/deployment-diagnostics.sh --quick # Quick checks only
|
||
# ./scripts/deployment-diagnostics.sh --verbose # Verbose output
|
||
#
|
||
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
|
||
|
||
PRODUCTION_SERVER="94.16.110.151"
|
||
REGISTRY="git.michaelschiemer.de:5000"
|
||
STACK_NAME="framework"
|
||
IMAGE="framework"
|
||
|
||
QUICK_MODE=false
|
||
VERBOSE=false
|
||
|
||
# Colors
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
CYAN='\033[0;36m'
|
||
NC='\033[0m'
|
||
|
||
log_error() {
|
||
echo -e "${RED}✗${NC} $1"
|
||
}
|
||
|
||
log_success() {
|
||
echo -e "${GREEN}✓${NC} $1"
|
||
}
|
||
|
||
log_warn() {
|
||
echo -e "${YELLOW}⚠${NC} $1"
|
||
}
|
||
|
||
log_info() {
|
||
echo -e "${BLUE}ℹ${NC} $1"
|
||
}
|
||
|
||
log_section() {
|
||
echo ""
|
||
echo -e "${CYAN}═══ $1 ═══${NC}"
|
||
}
|
||
|
||
# SSH helper
|
||
ssh_exec() {
|
||
ssh -i ~/.ssh/production deploy@"${PRODUCTION_SERVER}" "$@" 2>/dev/null || echo "SSH_FAILED"
|
||
}
|
||
|
||
# Check local prerequisites
|
||
check_local() {
|
||
log_section "Local Environment"
|
||
|
||
# Git status
|
||
if git status &> /dev/null; then
|
||
log_success "Git repository detected"
|
||
BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||
log_info "Current branch: ${BRANCH}"
|
||
|
||
if [[ -n $(git status --porcelain) ]]; then
|
||
log_warn "Working directory has uncommitted changes"
|
||
else
|
||
log_success "Working directory is clean"
|
||
fi
|
||
else
|
||
log_error "Not in a git repository"
|
||
fi
|
||
|
||
# Docker
|
||
if command -v docker &> /dev/null; then
|
||
log_success "Docker installed"
|
||
DOCKER_VERSION=$(docker --version | cut -d' ' -f3 | tr -d ',')
|
||
log_info "Version: ${DOCKER_VERSION}"
|
||
else
|
||
log_error "Docker not found"
|
||
fi
|
||
|
||
# Ansible
|
||
if command -v ansible-playbook &> /dev/null; then
|
||
log_success "Ansible installed"
|
||
ANSIBLE_VERSION=$(ansible-playbook --version | head -1 | cut -d' ' -f2)
|
||
log_info "Version: ${ANSIBLE_VERSION}"
|
||
else
|
||
log_error "Ansible not found"
|
||
fi
|
||
|
||
# SSH key
|
||
if [[ -f ~/.ssh/production ]]; then
|
||
log_success "Production SSH key found"
|
||
else
|
||
log_error "Production SSH key not found at ~/.ssh/production"
|
||
fi
|
||
}
|
||
|
||
# Check SSH connectivity
|
||
check_ssh() {
|
||
log_section "SSH Connectivity"
|
||
|
||
RESULT=$(ssh_exec "echo 'OK'")
|
||
|
||
if [[ "$RESULT" == "OK" ]]; then
|
||
log_success "SSH connection to production server"
|
||
else
|
||
log_error "Cannot connect to production server via SSH"
|
||
log_info "Check: ssh -i ~/.ssh/production deploy@${PRODUCTION_SERVER}"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# Check Docker Swarm
|
||
check_docker_swarm() {
|
||
log_section "Docker Swarm Status"
|
||
|
||
SWARM_STATUS=$(ssh_exec "docker info | grep 'Swarm:' | awk '{print \$2}'")
|
||
|
||
if [[ "$SWARM_STATUS" == "active" ]]; then
|
||
log_success "Docker Swarm is active"
|
||
|
||
# Manager nodes
|
||
MANAGERS=$(ssh_exec "docker node ls --filter role=manager --format '{{.Hostname}}'")
|
||
log_info "Manager nodes: ${MANAGERS}"
|
||
|
||
# Worker nodes
|
||
WORKERS=$(ssh_exec "docker node ls --filter role=worker --format '{{.Hostname}}' | wc -l")
|
||
log_info "Worker nodes: ${WORKERS}"
|
||
else
|
||
log_error "Docker Swarm is not active"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# Check services
|
||
check_services() {
|
||
log_section "Framework Services"
|
||
|
||
# List services
|
||
SERVICES=$(ssh_exec "docker service ls --filter 'name=${STACK_NAME}' --format '{{.Name}}: {{.Replicas}}'")
|
||
|
||
if [[ -n "$SERVICES" ]]; then
|
||
log_success "Framework services found"
|
||
echo "$SERVICES" | while read -r line; do
|
||
log_info "$line"
|
||
done
|
||
else
|
||
log_error "No framework services found"
|
||
return 1
|
||
fi
|
||
|
||
# Check web service
|
||
WEB_STATUS=$(ssh_exec "docker service ps ${STACK_NAME}_web --filter 'desired-state=running' --format '{{.CurrentState}}' | head -1")
|
||
|
||
if [[ "$WEB_STATUS" =~ Running ]]; then
|
||
log_success "Web service is running"
|
||
else
|
||
log_error "Web service is not running: ${WEB_STATUS}"
|
||
fi
|
||
|
||
# Check worker service
|
||
WORKER_STATUS=$(ssh_exec "docker service ps ${STACK_NAME}_queue-worker --filter 'desired-state=running' --format '{{.CurrentState}}' | head -1")
|
||
|
||
if [[ "$WORKER_STATUS" =~ Running ]]; then
|
||
log_success "Queue worker is running"
|
||
else
|
||
log_error "Queue worker is not running: ${WORKER_STATUS}"
|
||
fi
|
||
}
|
||
|
||
# Check Docker images
|
||
check_images() {
|
||
log_section "Docker Images"
|
||
|
||
# Current running image
|
||
CURRENT_IMAGE=$(ssh_exec "docker service inspect ${STACK_NAME}_web --format '{{.Spec.TaskTemplate.ContainerSpec.Image}}'")
|
||
|
||
if [[ -n "$CURRENT_IMAGE" ]]; then
|
||
log_success "Current image: ${CURRENT_IMAGE}"
|
||
else
|
||
log_error "Cannot determine current image"
|
||
fi
|
||
|
||
# Available images (last 5)
|
||
log_info "Available images (last 5):"
|
||
ssh_exec "docker images ${REGISTRY}/${IMAGE} --format ' {{.Tag}} ({{.CreatedAt}})' | grep -v buildcache | head -5"
|
||
}
|
||
|
||
# Check networks
|
||
check_networks() {
|
||
log_section "Docker Networks"
|
||
|
||
NETWORKS=$(ssh_exec "docker network ls --filter 'name=${STACK_NAME}' --format '{{.Name}}: {{.Driver}}'")
|
||
|
||
if [[ -n "$NETWORKS" ]]; then
|
||
log_success "Framework networks found"
|
||
echo "$NETWORKS" | while read -r line; do
|
||
log_info "$line"
|
||
done
|
||
else
|
||
log_warn "No framework-specific networks found"
|
||
fi
|
||
}
|
||
|
||
# Check volumes
|
||
check_volumes() {
|
||
log_section "Docker Volumes"
|
||
|
||
VOLUMES=$(ssh_exec "docker volume ls --filter 'name=${STACK_NAME}' --format '{{.Name}}'")
|
||
|
||
if [[ -n "$VOLUMES" ]]; then
|
||
log_success "Framework volumes found"
|
||
echo "$VOLUMES" | while read -r line; do
|
||
log_info "$line"
|
||
done
|
||
else
|
||
log_warn "No framework-specific volumes found"
|
||
fi
|
||
}
|
||
|
||
# Check application health
|
||
check_app_health() {
|
||
log_section "Application Health"
|
||
|
||
# Main health endpoint
|
||
HTTP_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health || echo "000")
|
||
|
||
if [[ "$HTTP_CODE" == "200" ]] || [[ "$HTTP_CODE" == "302" ]]; then
|
||
log_success "Application health endpoint: ${HTTP_CODE}"
|
||
else
|
||
log_error "Application health endpoint failed: ${HTTP_CODE}"
|
||
fi
|
||
|
||
# Database health
|
||
DB_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health/database || echo "000")
|
||
|
||
if [[ "$DB_CODE" == "200" ]]; then
|
||
log_success "Database connectivity: OK"
|
||
else
|
||
log_warn "Database connectivity: ${DB_CODE}"
|
||
fi
|
||
|
||
# Redis health
|
||
REDIS_CODE=$(curl -k -s -o /dev/null -w "%{http_code}" https://michaelschiemer.de/health/redis || echo "000")
|
||
|
||
if [[ "$REDIS_CODE" == "200" ]]; then
|
||
log_success "Redis connectivity: OK"
|
||
else
|
||
log_warn "Redis connectivity: ${REDIS_CODE}"
|
||
fi
|
||
}
|
||
|
||
# Check Docker secrets
|
||
check_secrets() {
|
||
log_section "Docker Secrets"
|
||
|
||
SECRETS=$(ssh_exec "docker secret ls --format '{{.Name}}' | wc -l")
|
||
|
||
if [[ "$SECRETS" -gt 0 ]]; then
|
||
log_success "Docker secrets configured: ${SECRETS} secrets"
|
||
else
|
||
log_warn "No Docker secrets found"
|
||
fi
|
||
}
|
||
|
||
# Check recent logs
|
||
check_logs() {
|
||
log_section "Recent Logs"
|
||
|
||
log_info "Last 20 lines from web service:"
|
||
ssh_exec "docker service logs ${STACK_NAME}_web --tail 20"
|
||
}
|
||
|
||
# Check Gitea runner
|
||
check_gitea_runner() {
|
||
log_section "Gitea Actions Runner"
|
||
|
||
RUNNER_STATUS=$(ssh_exec "systemctl is-active gitea-runner 2>/dev/null || echo 'not-found'")
|
||
|
||
if [[ "$RUNNER_STATUS" == "active" ]]; then
|
||
log_success "Gitea runner service is active"
|
||
elif [[ "$RUNNER_STATUS" == "not-found" ]]; then
|
||
log_warn "Gitea runner service not found (may not be installed yet)"
|
||
else
|
||
log_error "Gitea runner service is ${RUNNER_STATUS}"
|
||
fi
|
||
}
|
||
|
||
# Resource usage
|
||
check_resources() {
|
||
log_section "Resource Usage"
|
||
|
||
# Disk usage
|
||
DISK_USAGE=$(ssh_exec "df -h / | tail -1 | awk '{print \$5}'")
|
||
log_info "Disk usage: ${DISK_USAGE}"
|
||
|
||
# Memory usage
|
||
MEMORY_USAGE=$(ssh_exec "free -h | grep Mem | awk '{print \$3\"/\"\$2}'")
|
||
log_info "Memory usage: ${MEMORY_USAGE}"
|
||
|
||
# Docker disk usage
|
||
log_info "Docker disk usage:"
|
||
ssh_exec "docker system df"
|
||
}
|
||
|
||
# Parse arguments
|
||
for arg in "$@"; do
|
||
case $arg in
|
||
--quick)
|
||
QUICK_MODE=true
|
||
;;
|
||
--verbose)
|
||
VERBOSE=true
|
||
;;
|
||
esac
|
||
done
|
||
|
||
# Main diagnostics
|
||
main() {
|
||
echo ""
|
||
echo -e "${CYAN}╔════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${CYAN}║ DEPLOYMENT DIAGNOSTICS REPORT ║${NC}"
|
||
echo -e "${CYAN}╚════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
|
||
check_local
|
||
check_ssh || { log_error "SSH connectivity failed - cannot continue"; exit 1; }
|
||
check_docker_swarm
|
||
check_services
|
||
check_images
|
||
check_app_health
|
||
|
||
if [[ "$QUICK_MODE" == false ]]; then
|
||
check_networks
|
||
check_volumes
|
||
check_secrets
|
||
check_gitea_runner
|
||
check_resources
|
||
|
||
if [[ "$VERBOSE" == true ]]; then
|
||
check_logs
|
||
fi
|
||
fi
|
||
|
||
echo ""
|
||
echo -e "${CYAN}╔════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${CYAN}║ DIAGNOSTICS COMPLETED ║${NC}"
|
||
echo -e "${CYAN}╚════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
log_info "For detailed logs: ./scripts/deployment-diagnostics.sh --verbose"
|
||
log_info "For service recovery: ./scripts/service-recovery.sh recover"
|
||
echo ""
|
||
}
|
||
|
||
main "$@"
|