--- # Monitoring Role Default Variables # General Configuration monitoring_enabled: "{{ monitoring_enabled | default(true) }}" health_checks_enabled: "{{ health_checks_enabled | default(true) }}" monitoring_user: monitoring monitoring_group: monitoring monitoring_home: /opt/monitoring # Node Exporter Configuration node_exporter_enabled: true node_exporter_version: "1.6.1" node_exporter_port: 9100 node_exporter_bind_address: "127.0.0.1" node_exporter_user: node_exporter node_exporter_group: node_exporter # Prometheus Configuration (basic) prometheus_enabled: false # Can be enabled for advanced monitoring prometheus_version: "2.45.0" prometheus_port: 9090 prometheus_bind_address: "127.0.0.1" prometheus_retention_time: "15d" prometheus_retention_size: "10GB" # Health Check Configuration health_check_interval: 30 health_check_timeout: 10 health_check_retries: 3 # Service Health Checks service_checks: - name: nginx command: "systemctl is-active nginx" interval: 30 timeout: 5 retries: 2 - name: docker command: "docker version" interval: 60 timeout: 10 retries: 3 - name: php-fpm command: "docker exec php php-fpm -t" interval: 60 timeout: 15 retries: 2 - name: mysql command: "docker exec mysql mysqladmin ping -h localhost" interval: 60 timeout: 10 retries: 3 # Application Health Checks app_health_checks: - name: framework-health url: "https://{{ domain_name }}/health" method: GET expected_status: 200 timeout: 10 interval: 30 - name: api-health url: "https://{{ domain_name }}/api/health" method: GET expected_status: 200 timeout: 5 interval: 60 # System Monitoring Thresholds monitoring_thresholds: cpu_usage_warning: 70 cpu_usage_critical: 90 memory_usage_warning: 80 memory_usage_critical: 95 disk_usage_warning: 80 disk_usage_critical: 90 load_average_warning: 2.0 load_average_critical: 4.0 # Log Monitoring log_monitoring_enabled: true log_files_to_monitor: - path: /var/log/nginx/error.log patterns: - "error" - "warn" - "crit" alert_threshold: 10 # alerts per minute - path: /var/log/nginx/access.log patterns: - "5[0-9][0-9]" # 5xx errors - "4[0-9][0-9]" # 4xx errors alert_threshold: 20 - path: /var/log/auth.log patterns: - "Failed password" - "authentication failure" alert_threshold: 5 # Alerting Configuration alerting_enabled: true alert_email: "{{ ssl_email }}" alert_methods: - email - log # Backup Monitoring backup_monitoring_enabled: "{{ backup_enabled | default(false) }}" backup_check_command: "/usr/local/bin/check-backups.sh" backup_alert_threshold: 24 # hours # Performance Monitoring performance_monitoring_enabled: true performance_check_interval: 300 # 5 minutes performance_metrics: - response_time - throughput - error_rate - resource_usage # Container Monitoring docker_monitoring_enabled: true docker_stats_interval: 60 docker_health_check_command: "docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}'" # Custom Framework Monitoring framework_monitoring: console_health_check: "php console.php framework:health-check" mcp_server_check: "php console.php mcp:server --test" queue_monitoring: "php console.php queue:status" cache_monitoring: "php console.php cache:status" # Monitoring Scripts Location monitoring_scripts_dir: "{{ monitoring_home }}/scripts" monitoring_logs_dir: "/var/log/monitoring" monitoring_config_dir: "{{ monitoring_home }}/config" # Cleanup Configuration log_retention_days: 30 metrics_retention_days: 7 cleanup_schedule: "0 2 * * *" # Daily at 2 AM