Files
michaelschiemer/src/Framework/Monitoring/AdvancedMonitoring.php
Michael Schiemer 55a330b223 Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug
- Add DISCOVERY_SHOW_PROGRESS=true
- Temporary changes for debugging InitializerProcessor fixes on production
2025-08-11 20:13:26 +02:00

535 lines
17 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\Monitoring;
use App\Framework\Core\ValueObjects\Duration;
use App\Framework\Core\ValueObjects\Percentage;
use App\Framework\Core\ValueObjects\Timestamp;
use App\Framework\DateTime\Clock;
use App\Framework\Logging\Logger;
use App\Framework\StateManagement\StateManager;
/**
* Advanced monitoring system for circuit breakers and error boundaries
*/
final readonly class AdvancedMonitoring
{
public function __construct(
private StateManager $stateManager,
private Clock $clock,
private ?Logger $logger = null,
private MonitoringConfig $config = new MonitoringConfig(),
) {
}
/**
* Record circuit breaker metrics
*/
public function recordCircuitBreakerMetrics(string $circuitName, CircuitBreakerMetrics $metrics): void
{
$timestamp = $this->clock->time();
$key = "monitoring:circuit:{$circuitName}:{$timestamp->toTimestamp()}";
$this->stateManager->setState(
$key,
$metrics->toArray(),
$this->config->metricsRetention
);
// Update aggregated metrics
$this->updateAggregatedMetrics($circuitName, $metrics, $timestamp);
// Check for alerts
$this->checkCircuitBreakerAlerts($circuitName, $metrics);
}
/**
* Record error boundary metrics
*/
public function recordErrorBoundaryMetrics(string $boundaryName, ErrorBoundaryMetrics $metrics): void
{
$timestamp = $this->clock->time();
$key = "monitoring:boundary:{$boundaryName}:{$timestamp->toTimestamp()}";
$this->stateManager->setState(
$key,
$metrics->toArray(),
$this->config->metricsRetention
);
// Update aggregated metrics
$this->updateBoundaryAggregatedMetrics($boundaryName, $metrics, $timestamp);
// Check for alerts
$this->checkErrorBoundaryAlerts($boundaryName, $metrics);
}
/**
* Get circuit breaker health dashboard
*/
public function getCircuitBreakerDashboard(): array
{
$circuits = $this->getActiveCircuits();
$dashboard = [
'timestamp' => $this->clock->time()->toIsoString(),
'total_circuits' => count($circuits),
'healthy_circuits' => 0,
'degraded_circuits' => 0,
'failed_circuits' => 0,
'circuits' => [],
];
foreach ($circuits as $circuitName) {
$health = $this->getCircuitHealth($circuitName);
$dashboard['circuits'][$circuitName] = $health;
match ($health['status']) {
'healthy' => $dashboard['healthy_circuits']++,
'degraded' => $dashboard['degraded_circuits']++,
'failed' => $dashboard['failed_circuits']++,
};
}
return $dashboard;
}
/**
* Get error boundary dashboard
*/
public function getErrorBoundaryDashboard(): array
{
$boundaries = $this->getActiveBoundaries();
$dashboard = [
'timestamp' => $this->clock->time()->toIsoString(),
'total_boundaries' => count($boundaries),
'healthy_boundaries' => 0,
'degraded_boundaries' => 0,
'failed_boundaries' => 0,
'boundaries' => [],
];
foreach ($boundaries as $boundaryName) {
$health = $this->getBoundaryHealth($boundaryName);
$dashboard['boundaries'][$boundaryName] = $health;
match ($health['status']) {
'healthy' => $dashboard['healthy_boundaries']++,
'degraded' => $dashboard['degraded_boundaries']++,
'failed' => $dashboard['failed_boundaries']++,
};
}
return $dashboard;
}
/**
* Get system-wide health summary
*/
public function getSystemHealthSummary(): SystemHealthSummary
{
$circuitMetrics = $this->getAggregatedCircuitMetrics();
$boundaryMetrics = $this->getAggregatedBoundaryMetrics();
$totalComponents = count($circuitMetrics) + count($boundaryMetrics);
$healthyComponents = 0;
$degradedComponents = 0;
$failedComponents = 0;
foreach ($circuitMetrics as $metrics) {
match ($this->calculateHealthStatus($metrics)) {
'healthy' => $healthyComponents++,
'degraded' => $degradedComponents++,
'failed' => $failedComponents++,
};
}
foreach ($boundaryMetrics as $metrics) {
match ($this->calculateBoundaryHealthStatus($metrics)) {
'healthy' => $healthyComponents++,
'degraded' => $degradedComponents++,
'failed' => $failedComponents++,
};
}
$overallHealthScore = $totalComponents > 0
? Percentage::fromFloat($healthyComponents / $totalComponents)
: Percentage::fromFloat(1.0);
return new SystemHealthSummary(
timestamp: $this->clock->time(),
totalComponents: $totalComponents,
healthyComponents: $healthyComponents,
degradedComponents: $degradedComponents,
failedComponents: $failedComponents,
overallHealthScore: $overallHealthScore,
circuitBreakerCount: count($circuitMetrics),
errorBoundaryCount: count($boundaryMetrics),
);
}
/**
* Get trend analysis for circuit breaker
*/
public function getCircuitBreakerTrends(string $circuitName, Duration $timeWindow): array
{
$endTime = $this->clock->time();
$startTime = Timestamp::fromFloat($endTime->toFloat() - $timeWindow->toSeconds());
$metrics = $this->getMetricsInTimeRange(
"monitoring:circuit:{$circuitName}",
$startTime,
$endTime
);
return $this->analyzeTrends($metrics);
}
/**
* Get performance analytics
*/
public function getPerformanceAnalytics(Duration $timeWindow): array
{
$endTime = $this->clock->time();
$startTime = Timestamp::fromFloat($endTime->toFloat() - $timeWindow->toSeconds());
$circuits = $this->getActiveCircuits();
$boundaries = $this->getActiveBoundaries();
$analytics = [
'time_range' => [
'start' => $startTime->toIsoString(),
'end' => $endTime->toIsoString(),
'duration' => $timeWindow->toHumanReadable(),
],
'circuit_breakers' => [],
'error_boundaries' => [],
'system_summary' => [
'total_requests' => 0,
'total_failures' => 0,
'average_response_time' => null,
'peak_response_time' => null,
'error_rate' => null,
],
];
$totalRequests = 0;
$totalFailures = 0;
$totalResponseTime = 0;
$peakResponseTime = Duration::zero();
// Analyze circuit breakers
foreach ($circuits as $circuitName) {
$metrics = $this->getMetricsInTimeRange(
"monitoring:circuit:{$circuitName}",
$startTime,
$endTime
);
$circuitAnalysis = $this->analyzeCircuitPerformance($metrics);
$analytics['circuit_breakers'][$circuitName] = $circuitAnalysis;
$totalRequests += $circuitAnalysis['total_requests'];
$totalFailures += $circuitAnalysis['total_failures'];
$totalResponseTime += $circuitAnalysis['total_response_time_ms'];
if ($circuitAnalysis['peak_response_time'] > $peakResponseTime->toMilliseconds()) {
$peakResponseTime = Duration::fromMilliseconds($circuitAnalysis['peak_response_time']);
}
}
// Analyze error boundaries
foreach ($boundaries as $boundaryName) {
$metrics = $this->getMetricsInTimeRange(
"monitoring:boundary:{$boundaryName}",
$startTime,
$endTime
);
$boundaryAnalysis = $this->analyzeBoundaryPerformance($metrics);
$analytics['error_boundaries'][$boundaryName] = $boundaryAnalysis;
}
// System summary
if ($totalRequests > 0) {
$analytics['system_summary'] = [
'total_requests' => $totalRequests,
'total_failures' => $totalFailures,
'average_response_time' => Duration::fromMilliseconds($totalResponseTime / $totalRequests),
'peak_response_time' => $peakResponseTime,
'error_rate' => Percentage::fromFloat($totalFailures / $totalRequests),
];
}
return $analytics;
}
/**
* Generate monitoring alerts
*/
public function generateAlerts(): array
{
$alerts = [];
// Check circuit breaker alerts
foreach ($this->getActiveCircuits() as $circuitName) {
$health = $this->getCircuitHealth($circuitName);
if ($health['status'] !== 'healthy') {
$alerts[] = new MonitoringAlert(
type: 'circuit_breaker',
severity: $health['status'] === 'failed' ? 'critical' : 'warning',
component: $circuitName,
message: "Circuit breaker '{$circuitName}' is in {$health['status']} state",
timestamp: $this->clock->time(),
metrics: $health,
);
}
}
// Check error boundary alerts
foreach ($this->getActiveBoundaries() as $boundaryName) {
$health = $this->getBoundaryHealth($boundaryName);
if ($health['status'] !== 'healthy') {
$alerts[] = new MonitoringAlert(
type: 'error_boundary',
severity: $health['status'] === 'failed' ? 'critical' : 'warning',
component: $boundaryName,
message: "Error boundary '{$boundaryName}' is in {$health['status']} state",
timestamp: $this->clock->time(),
metrics: $health,
);
}
}
return $alerts;
}
/**
* Get circuit health status
*/
private function getCircuitHealth(string $circuitName): array
{
$aggregatedKey = "monitoring:circuit:aggregated:{$circuitName}";
$metrics = $this->stateManager->getState($aggregatedKey);
if ($metrics === null) {
return [
'status' => 'unknown',
'last_updated' => null,
'metrics' => null,
];
}
$status = $this->calculateHealthStatus($metrics);
return [
'status' => $status,
'last_updated' => $metrics['last_updated'] ?? null,
'metrics' => $metrics,
];
}
/**
* Get boundary health status
*/
private function getBoundaryHealth(string $boundaryName): array
{
$aggregatedKey = "monitoring:boundary:aggregated:{$boundaryName}";
$metrics = $this->stateManager->getState($aggregatedKey);
if ($metrics === null) {
return [
'status' => 'unknown',
'last_updated' => null,
'metrics' => null,
];
}
$status = $this->calculateBoundaryHealthStatus($metrics);
return [
'status' => $status,
'last_updated' => $metrics['last_updated'] ?? null,
'metrics' => $metrics,
];
}
/**
* Calculate health status from metrics
*/
private function calculateHealthStatus(array $metrics): string
{
$errorRate = $metrics['error_rate'] ?? 0.0;
$responseTime = $metrics['average_response_time'] ?? 0.0;
if ($errorRate > $this->config->criticalErrorRateThreshold) {
return 'failed';
}
if ($errorRate > $this->config->warningErrorRateThreshold ||
$responseTime > $this->config->slowResponseThreshold->toMilliseconds()) {
return 'degraded';
}
return 'healthy';
}
/**
* Calculate boundary health status from metrics
*/
private function calculateBoundaryHealthStatus(array $metrics): string
{
$fallbackRate = $metrics['fallback_rate'] ?? 0.0;
$errorRate = $metrics['error_rate'] ?? 0.0;
if ($fallbackRate > $this->config->criticalFallbackRateThreshold) {
return 'failed';
}
if ($fallbackRate > $this->config->warningFallbackRateThreshold ||
$errorRate > $this->config->warningErrorRateThreshold) {
return 'degraded';
}
return 'healthy';
}
/**
* Update aggregated metrics for circuit breaker
*/
private function updateAggregatedMetrics(
string $circuitName,
CircuitBreakerMetrics $metrics,
Timestamp $timestamp
): void {
$aggregatedKey = "monitoring:circuit:aggregated:{$circuitName}";
$this->stateManager->updateState($aggregatedKey, function ($existing) use ($metrics, $timestamp) {
return $this->aggregateCircuitMetrics($existing, $metrics, $timestamp);
});
}
/**
* Update aggregated metrics for error boundary
*/
private function updateBoundaryAggregatedMetrics(
string $boundaryName,
ErrorBoundaryMetrics $metrics,
Timestamp $timestamp
): void {
$aggregatedKey = "monitoring:boundary:aggregated:{$boundaryName}";
$this->stateManager->updateState($aggregatedKey, function ($existing) use ($metrics, $timestamp) {
return $this->aggregateBoundaryMetrics($existing, $metrics, $timestamp);
});
}
/**
* Aggregate circuit breaker metrics
*/
private function aggregateCircuitMetrics(?array $existing, CircuitBreakerMetrics $metrics, Timestamp $timestamp): array
{
if ($existing === null) {
return array_merge($metrics->toArray(), [
'first_seen' => $timestamp->toIsoString(),
'last_updated' => $timestamp->toIsoString(),
]);
}
// Simple aggregation - in production you'd want more sophisticated algorithms
$totalRequests = ($existing['total_requests'] ?? 0) + $metrics->totalRequests;
$totalFailures = ($existing['total_failures'] ?? 0) + $metrics->totalFailures;
return [
'total_requests' => $totalRequests,
'total_failures' => $totalFailures,
'error_rate' => $totalRequests > 0 ? $totalFailures / $totalRequests : 0.0,
'average_response_time' => $metrics->averageResponseTime->toMilliseconds(),
'peak_response_time' => max(
$existing['peak_response_time'] ?? 0,
$metrics->peakResponseTime->toMilliseconds()
),
'first_seen' => $existing['first_seen'],
'last_updated' => $timestamp->toIsoString(),
];
}
/**
* Aggregate boundary metrics
*/
private function aggregateBoundaryMetrics(?array $existing, ErrorBoundaryMetrics $metrics, Timestamp $timestamp): array
{
if ($existing === null) {
return array_merge($metrics->toArray(), [
'first_seen' => $timestamp->toIsoString(),
'last_updated' => $timestamp->toIsoString(),
]);
}
$totalExecutions = ($existing['total_executions'] ?? 0) + $metrics->totalExecutions;
$totalFallbacks = ($existing['total_fallbacks'] ?? 0) + $metrics->totalFallbacks;
$totalErrors = ($existing['total_errors'] ?? 0) + $metrics->totalErrors;
return [
'total_executions' => $totalExecutions,
'total_fallbacks' => $totalFallbacks,
'total_errors' => $totalErrors,
'fallback_rate' => $totalExecutions > 0 ? $totalFallbacks / $totalExecutions : 0.0,
'error_rate' => $totalExecutions > 0 ? $totalErrors / $totalExecutions : 0.0,
'average_execution_time' => $metrics->averageExecutionTime->toMilliseconds(),
'first_seen' => $existing['first_seen'],
'last_updated' => $timestamp->toIsoString(),
];
}
// Placeholder methods - implement based on your specific needs
private function getActiveCircuits(): array
{
return [];
}
private function getActiveBoundaries(): array
{
return [];
}
private function getAggregatedCircuitMetrics(): array
{
return [];
}
private function getAggregatedBoundaryMetrics(): array
{
return [];
}
private function getMetricsInTimeRange(string $prefix, Timestamp $start, Timestamp $end): array
{
return [];
}
private function analyzeTrends(array $metrics): array
{
return [];
}
private function analyzeCircuitPerformance(array $metrics): array
{
return [];
}
private function analyzeBoundaryPerformance(array $metrics): array
{
return [];
}
private function checkCircuitBreakerAlerts(string $name, CircuitBreakerMetrics $metrics): void
{
}
private function checkErrorBoundaryAlerts(string $name, ErrorBoundaryMetrics $metrics): void
{
}
}