Resolved multiple critical discovery system issues: ## Discovery System Fixes - Fixed console commands not being discovered on first run - Implemented fallback discovery for empty caches - Added context-aware caching with separate cache keys - Fixed object serialization preventing __PHP_Incomplete_Class ## Cache System Improvements - Smart caching that only caches meaningful results - Separate caches for different execution contexts (console, web, test) - Proper array serialization/deserialization for cache compatibility - Cache hit logging for debugging and monitoring ## Object Serialization Fixes - Fixed DiscoveredAttribute serialization with proper string conversion - Sanitized additional data to prevent object reference issues - Added fallback for corrupted cache entries ## Performance & Reliability - All 69 console commands properly discovered and cached - 534 total discovery items successfully cached and restored - No more __PHP_Incomplete_Class cache corruption - Improved error handling and graceful fallbacks ## Testing & Quality - Fixed code style issues across discovery components - Enhanced logging for better debugging capabilities - Improved cache validation and error recovery Ready for production deployment with stable discovery system. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
148 lines
3.6 KiB
YAML
148 lines
3.6 KiB
YAML
---
|
|
# Monitoring Role Default Variables
|
|
|
|
# General Configuration
|
|
monitoring_enabled: "{{ monitoring_enabled | default(true) }}"
|
|
health_checks_enabled: "{{ health_checks_enabled | default(true) }}"
|
|
monitoring_user: monitoring
|
|
monitoring_group: monitoring
|
|
monitoring_home: /opt/monitoring
|
|
|
|
# Node Exporter Configuration
|
|
node_exporter_enabled: true
|
|
node_exporter_version: "1.6.1"
|
|
node_exporter_port: 9100
|
|
node_exporter_bind_address: "127.0.0.1"
|
|
node_exporter_user: node_exporter
|
|
node_exporter_group: node_exporter
|
|
|
|
# Prometheus Configuration (basic)
|
|
prometheus_enabled: false # Can be enabled for advanced monitoring
|
|
prometheus_version: "2.45.0"
|
|
prometheus_port: 9090
|
|
prometheus_bind_address: "127.0.0.1"
|
|
prometheus_retention_time: "15d"
|
|
prometheus_retention_size: "10GB"
|
|
|
|
# Health Check Configuration
|
|
health_check_interval: 30
|
|
health_check_timeout: 10
|
|
health_check_retries: 3
|
|
|
|
# Service Health Checks
|
|
service_checks:
|
|
- name: nginx
|
|
command: "systemctl is-active nginx"
|
|
interval: 30
|
|
timeout: 5
|
|
retries: 2
|
|
|
|
- name: docker
|
|
command: "docker version"
|
|
interval: 60
|
|
timeout: 10
|
|
retries: 3
|
|
|
|
- name: php-fpm
|
|
command: "docker exec php php-fpm -t"
|
|
interval: 60
|
|
timeout: 15
|
|
retries: 2
|
|
|
|
- name: mysql
|
|
command: "docker exec mysql mysqladmin ping -h localhost"
|
|
interval: 60
|
|
timeout: 10
|
|
retries: 3
|
|
|
|
# Application Health Checks
|
|
app_health_checks:
|
|
- name: framework-health
|
|
url: "https://{{ domain_name }}/health"
|
|
method: GET
|
|
expected_status: 200
|
|
timeout: 10
|
|
interval: 30
|
|
|
|
- name: api-health
|
|
url: "https://{{ domain_name }}/api/health"
|
|
method: GET
|
|
expected_status: 200
|
|
timeout: 5
|
|
interval: 60
|
|
|
|
# System Monitoring Thresholds
|
|
monitoring_thresholds:
|
|
cpu_usage_warning: 70
|
|
cpu_usage_critical: 90
|
|
memory_usage_warning: 80
|
|
memory_usage_critical: 95
|
|
disk_usage_warning: 80
|
|
disk_usage_critical: 90
|
|
load_average_warning: 2.0
|
|
load_average_critical: 4.0
|
|
|
|
# Log Monitoring
|
|
log_monitoring_enabled: true
|
|
log_files_to_monitor:
|
|
- path: /var/log/nginx/error.log
|
|
patterns:
|
|
- "error"
|
|
- "warn"
|
|
- "crit"
|
|
alert_threshold: 10 # alerts per minute
|
|
|
|
- path: /var/log/nginx/access.log
|
|
patterns:
|
|
- "5[0-9][0-9]" # 5xx errors
|
|
- "4[0-9][0-9]" # 4xx errors
|
|
alert_threshold: 20
|
|
|
|
- path: /var/log/auth.log
|
|
patterns:
|
|
- "Failed password"
|
|
- "authentication failure"
|
|
alert_threshold: 5
|
|
|
|
# Alerting Configuration
|
|
alerting_enabled: true
|
|
alert_email: "{{ ssl_email }}"
|
|
alert_methods:
|
|
- email
|
|
- log
|
|
|
|
# Backup Monitoring
|
|
backup_monitoring_enabled: "{{ backup_enabled | default(false) }}"
|
|
backup_check_command: "/usr/local/bin/check-backups.sh"
|
|
backup_alert_threshold: 24 # hours
|
|
|
|
# Performance Monitoring
|
|
performance_monitoring_enabled: true
|
|
performance_check_interval: 300 # 5 minutes
|
|
performance_metrics:
|
|
- response_time
|
|
- throughput
|
|
- error_rate
|
|
- resource_usage
|
|
|
|
# Container Monitoring
|
|
docker_monitoring_enabled: true
|
|
docker_stats_interval: 60
|
|
docker_health_check_command: "docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}'"
|
|
|
|
# Custom Framework Monitoring
|
|
framework_monitoring:
|
|
console_health_check: "php console.php framework:health-check"
|
|
mcp_server_check: "php console.php mcp:server --test"
|
|
queue_monitoring: "php console.php queue:status"
|
|
cache_monitoring: "php console.php cache:status"
|
|
|
|
# Monitoring Scripts Location
|
|
monitoring_scripts_dir: "{{ monitoring_home }}/scripts"
|
|
monitoring_logs_dir: "/var/log/monitoring"
|
|
monitoring_config_dir: "{{ monitoring_home }}/config"
|
|
|
|
# Cleanup Configuration
|
|
log_retention_days: 30
|
|
metrics_retention_days: 7
|
|
cleanup_schedule: "0 2 * * *" # Daily at 2 AM |