feat: Fix discovery system critical issues
Resolved multiple critical discovery system issues: ## Discovery System Fixes - Fixed console commands not being discovered on first run - Implemented fallback discovery for empty caches - Added context-aware caching with separate cache keys - Fixed object serialization preventing __PHP_Incomplete_Class ## Cache System Improvements - Smart caching that only caches meaningful results - Separate caches for different execution contexts (console, web, test) - Proper array serialization/deserialization for cache compatibility - Cache hit logging for debugging and monitoring ## Object Serialization Fixes - Fixed DiscoveredAttribute serialization with proper string conversion - Sanitized additional data to prevent object reference issues - Added fallback for corrupted cache entries ## Performance & Reliability - All 69 console commands properly discovered and cached - 534 total discovery items successfully cached and restored - No more __PHP_Incomplete_Class cache corruption - Improved error handling and graceful fallbacks ## Testing & Quality - Fixed code style issues across discovery components - Enhanced logging for better debugging capabilities - Improved cache validation and error recovery Ready for production deployment with stable discovery system. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
148
deployment/infrastructure/roles/monitoring/defaults/main.yml
Normal file
148
deployment/infrastructure/roles/monitoring/defaults/main.yml
Normal file
@@ -0,0 +1,148 @@
|
||||
---
|
||||
# Monitoring Role Default Variables
|
||||
|
||||
# General Configuration
|
||||
monitoring_enabled: "{{ monitoring_enabled | default(true) }}"
|
||||
health_checks_enabled: "{{ health_checks_enabled | default(true) }}"
|
||||
monitoring_user: monitoring
|
||||
monitoring_group: monitoring
|
||||
monitoring_home: /opt/monitoring
|
||||
|
||||
# Node Exporter Configuration
|
||||
node_exporter_enabled: true
|
||||
node_exporter_version: "1.6.1"
|
||||
node_exporter_port: 9100
|
||||
node_exporter_bind_address: "127.0.0.1"
|
||||
node_exporter_user: node_exporter
|
||||
node_exporter_group: node_exporter
|
||||
|
||||
# Prometheus Configuration (basic)
|
||||
prometheus_enabled: false # Can be enabled for advanced monitoring
|
||||
prometheus_version: "2.45.0"
|
||||
prometheus_port: 9090
|
||||
prometheus_bind_address: "127.0.0.1"
|
||||
prometheus_retention_time: "15d"
|
||||
prometheus_retention_size: "10GB"
|
||||
|
||||
# Health Check Configuration
|
||||
health_check_interval: 30
|
||||
health_check_timeout: 10
|
||||
health_check_retries: 3
|
||||
|
||||
# Service Health Checks
|
||||
service_checks:
|
||||
- name: nginx
|
||||
command: "systemctl is-active nginx"
|
||||
interval: 30
|
||||
timeout: 5
|
||||
retries: 2
|
||||
|
||||
- name: docker
|
||||
command: "docker version"
|
||||
interval: 60
|
||||
timeout: 10
|
||||
retries: 3
|
||||
|
||||
- name: php-fpm
|
||||
command: "docker exec php php-fpm -t"
|
||||
interval: 60
|
||||
timeout: 15
|
||||
retries: 2
|
||||
|
||||
- name: mysql
|
||||
command: "docker exec mysql mysqladmin ping -h localhost"
|
||||
interval: 60
|
||||
timeout: 10
|
||||
retries: 3
|
||||
|
||||
# Application Health Checks
|
||||
app_health_checks:
|
||||
- name: framework-health
|
||||
url: "https://{{ domain_name }}/health"
|
||||
method: GET
|
||||
expected_status: 200
|
||||
timeout: 10
|
||||
interval: 30
|
||||
|
||||
- name: api-health
|
||||
url: "https://{{ domain_name }}/api/health"
|
||||
method: GET
|
||||
expected_status: 200
|
||||
timeout: 5
|
||||
interval: 60
|
||||
|
||||
# System Monitoring Thresholds
|
||||
monitoring_thresholds:
|
||||
cpu_usage_warning: 70
|
||||
cpu_usage_critical: 90
|
||||
memory_usage_warning: 80
|
||||
memory_usage_critical: 95
|
||||
disk_usage_warning: 80
|
||||
disk_usage_critical: 90
|
||||
load_average_warning: 2.0
|
||||
load_average_critical: 4.0
|
||||
|
||||
# Log Monitoring
|
||||
log_monitoring_enabled: true
|
||||
log_files_to_monitor:
|
||||
- path: /var/log/nginx/error.log
|
||||
patterns:
|
||||
- "error"
|
||||
- "warn"
|
||||
- "crit"
|
||||
alert_threshold: 10 # alerts per minute
|
||||
|
||||
- path: /var/log/nginx/access.log
|
||||
patterns:
|
||||
- "5[0-9][0-9]" # 5xx errors
|
||||
- "4[0-9][0-9]" # 4xx errors
|
||||
alert_threshold: 20
|
||||
|
||||
- path: /var/log/auth.log
|
||||
patterns:
|
||||
- "Failed password"
|
||||
- "authentication failure"
|
||||
alert_threshold: 5
|
||||
|
||||
# Alerting Configuration
|
||||
alerting_enabled: true
|
||||
alert_email: "{{ ssl_email }}"
|
||||
alert_methods:
|
||||
- email
|
||||
- log
|
||||
|
||||
# Backup Monitoring
|
||||
backup_monitoring_enabled: "{{ backup_enabled | default(false) }}"
|
||||
backup_check_command: "/usr/local/bin/check-backups.sh"
|
||||
backup_alert_threshold: 24 # hours
|
||||
|
||||
# Performance Monitoring
|
||||
performance_monitoring_enabled: true
|
||||
performance_check_interval: 300 # 5 minutes
|
||||
performance_metrics:
|
||||
- response_time
|
||||
- throughput
|
||||
- error_rate
|
||||
- resource_usage
|
||||
|
||||
# Container Monitoring
|
||||
docker_monitoring_enabled: true
|
||||
docker_stats_interval: 60
|
||||
docker_health_check_command: "docker ps --format 'table {{.Names}}\\t{{.Status}}\\t{{.Ports}}'"
|
||||
|
||||
# Custom Framework Monitoring
|
||||
framework_monitoring:
|
||||
console_health_check: "php console.php framework:health-check"
|
||||
mcp_server_check: "php console.php mcp:server --test"
|
||||
queue_monitoring: "php console.php queue:status"
|
||||
cache_monitoring: "php console.php cache:status"
|
||||
|
||||
# Monitoring Scripts Location
|
||||
monitoring_scripts_dir: "{{ monitoring_home }}/scripts"
|
||||
monitoring_logs_dir: "/var/log/monitoring"
|
||||
monitoring_config_dir: "{{ monitoring_home }}/config"
|
||||
|
||||
# Cleanup Configuration
|
||||
log_retention_days: 30
|
||||
metrics_retention_days: 7
|
||||
cleanup_schedule: "0 2 * * *" # Daily at 2 AM
|
||||
Reference in New Issue
Block a user