- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
535 lines
17 KiB
PHP
535 lines
17 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Monitoring;
|
|
|
|
use App\Framework\Core\ValueObjects\Duration;
|
|
use App\Framework\Core\ValueObjects\Percentage;
|
|
use App\Framework\Core\ValueObjects\Timestamp;
|
|
use App\Framework\DateTime\Clock;
|
|
use App\Framework\Logging\Logger;
|
|
use App\Framework\StateManagement\StateManager;
|
|
|
|
/**
|
|
* Advanced monitoring system for circuit breakers and error boundaries
|
|
*/
|
|
final readonly class AdvancedMonitoring
|
|
{
|
|
public function __construct(
|
|
private StateManager $stateManager,
|
|
private Clock $clock,
|
|
private ?Logger $logger = null,
|
|
private MonitoringConfig $config = new MonitoringConfig(),
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Record circuit breaker metrics
|
|
*/
|
|
public function recordCircuitBreakerMetrics(string $circuitName, CircuitBreakerMetrics $metrics): void
|
|
{
|
|
$timestamp = $this->clock->time();
|
|
$key = "monitoring:circuit:{$circuitName}:{$timestamp->toTimestamp()}";
|
|
|
|
$this->stateManager->setState(
|
|
$key,
|
|
$metrics->toArray(),
|
|
$this->config->metricsRetention
|
|
);
|
|
|
|
// Update aggregated metrics
|
|
$this->updateAggregatedMetrics($circuitName, $metrics, $timestamp);
|
|
|
|
// Check for alerts
|
|
$this->checkCircuitBreakerAlerts($circuitName, $metrics);
|
|
}
|
|
|
|
/**
|
|
* Record error boundary metrics
|
|
*/
|
|
public function recordErrorBoundaryMetrics(string $boundaryName, ErrorBoundaryMetrics $metrics): void
|
|
{
|
|
$timestamp = $this->clock->time();
|
|
$key = "monitoring:boundary:{$boundaryName}:{$timestamp->toTimestamp()}";
|
|
|
|
$this->stateManager->setState(
|
|
$key,
|
|
$metrics->toArray(),
|
|
$this->config->metricsRetention
|
|
);
|
|
|
|
// Update aggregated metrics
|
|
$this->updateBoundaryAggregatedMetrics($boundaryName, $metrics, $timestamp);
|
|
|
|
// Check for alerts
|
|
$this->checkErrorBoundaryAlerts($boundaryName, $metrics);
|
|
}
|
|
|
|
/**
|
|
* Get circuit breaker health dashboard
|
|
*/
|
|
public function getCircuitBreakerDashboard(): array
|
|
{
|
|
$circuits = $this->getActiveCircuits();
|
|
$dashboard = [
|
|
'timestamp' => $this->clock->time()->toIsoString(),
|
|
'total_circuits' => count($circuits),
|
|
'healthy_circuits' => 0,
|
|
'degraded_circuits' => 0,
|
|
'failed_circuits' => 0,
|
|
'circuits' => [],
|
|
];
|
|
|
|
foreach ($circuits as $circuitName) {
|
|
$health = $this->getCircuitHealth($circuitName);
|
|
$dashboard['circuits'][$circuitName] = $health;
|
|
|
|
match ($health['status']) {
|
|
'healthy' => $dashboard['healthy_circuits']++,
|
|
'degraded' => $dashboard['degraded_circuits']++,
|
|
'failed' => $dashboard['failed_circuits']++,
|
|
};
|
|
}
|
|
|
|
return $dashboard;
|
|
}
|
|
|
|
/**
|
|
* Get error boundary dashboard
|
|
*/
|
|
public function getErrorBoundaryDashboard(): array
|
|
{
|
|
$boundaries = $this->getActiveBoundaries();
|
|
$dashboard = [
|
|
'timestamp' => $this->clock->time()->toIsoString(),
|
|
'total_boundaries' => count($boundaries),
|
|
'healthy_boundaries' => 0,
|
|
'degraded_boundaries' => 0,
|
|
'failed_boundaries' => 0,
|
|
'boundaries' => [],
|
|
];
|
|
|
|
foreach ($boundaries as $boundaryName) {
|
|
$health = $this->getBoundaryHealth($boundaryName);
|
|
$dashboard['boundaries'][$boundaryName] = $health;
|
|
|
|
match ($health['status']) {
|
|
'healthy' => $dashboard['healthy_boundaries']++,
|
|
'degraded' => $dashboard['degraded_boundaries']++,
|
|
'failed' => $dashboard['failed_boundaries']++,
|
|
};
|
|
}
|
|
|
|
return $dashboard;
|
|
}
|
|
|
|
/**
|
|
* Get system-wide health summary
|
|
*/
|
|
public function getSystemHealthSummary(): SystemHealthSummary
|
|
{
|
|
$circuitMetrics = $this->getAggregatedCircuitMetrics();
|
|
$boundaryMetrics = $this->getAggregatedBoundaryMetrics();
|
|
|
|
$totalComponents = count($circuitMetrics) + count($boundaryMetrics);
|
|
$healthyComponents = 0;
|
|
$degradedComponents = 0;
|
|
$failedComponents = 0;
|
|
|
|
foreach ($circuitMetrics as $metrics) {
|
|
match ($this->calculateHealthStatus($metrics)) {
|
|
'healthy' => $healthyComponents++,
|
|
'degraded' => $degradedComponents++,
|
|
'failed' => $failedComponents++,
|
|
};
|
|
}
|
|
|
|
foreach ($boundaryMetrics as $metrics) {
|
|
match ($this->calculateBoundaryHealthStatus($metrics)) {
|
|
'healthy' => $healthyComponents++,
|
|
'degraded' => $degradedComponents++,
|
|
'failed' => $failedComponents++,
|
|
};
|
|
}
|
|
|
|
$overallHealthScore = $totalComponents > 0
|
|
? Percentage::fromFloat($healthyComponents / $totalComponents)
|
|
: Percentage::fromFloat(1.0);
|
|
|
|
return new SystemHealthSummary(
|
|
timestamp: $this->clock->time(),
|
|
totalComponents: $totalComponents,
|
|
healthyComponents: $healthyComponents,
|
|
degradedComponents: $degradedComponents,
|
|
failedComponents: $failedComponents,
|
|
overallHealthScore: $overallHealthScore,
|
|
circuitBreakerCount: count($circuitMetrics),
|
|
errorBoundaryCount: count($boundaryMetrics),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get trend analysis for circuit breaker
|
|
*/
|
|
public function getCircuitBreakerTrends(string $circuitName, Duration $timeWindow): array
|
|
{
|
|
$endTime = $this->clock->time();
|
|
$startTime = Timestamp::fromFloat($endTime->toFloat() - $timeWindow->toSeconds());
|
|
|
|
$metrics = $this->getMetricsInTimeRange(
|
|
"monitoring:circuit:{$circuitName}",
|
|
$startTime,
|
|
$endTime
|
|
);
|
|
|
|
return $this->analyzeTrends($metrics);
|
|
}
|
|
|
|
/**
|
|
* Get performance analytics
|
|
*/
|
|
public function getPerformanceAnalytics(Duration $timeWindow): array
|
|
{
|
|
$endTime = $this->clock->time();
|
|
$startTime = Timestamp::fromFloat($endTime->toFloat() - $timeWindow->toSeconds());
|
|
|
|
$circuits = $this->getActiveCircuits();
|
|
$boundaries = $this->getActiveBoundaries();
|
|
|
|
$analytics = [
|
|
'time_range' => [
|
|
'start' => $startTime->toIsoString(),
|
|
'end' => $endTime->toIsoString(),
|
|
'duration' => $timeWindow->toHumanReadable(),
|
|
],
|
|
'circuit_breakers' => [],
|
|
'error_boundaries' => [],
|
|
'system_summary' => [
|
|
'total_requests' => 0,
|
|
'total_failures' => 0,
|
|
'average_response_time' => null,
|
|
'peak_response_time' => null,
|
|
'error_rate' => null,
|
|
],
|
|
];
|
|
|
|
$totalRequests = 0;
|
|
$totalFailures = 0;
|
|
$totalResponseTime = 0;
|
|
$peakResponseTime = Duration::zero();
|
|
|
|
// Analyze circuit breakers
|
|
foreach ($circuits as $circuitName) {
|
|
$metrics = $this->getMetricsInTimeRange(
|
|
"monitoring:circuit:{$circuitName}",
|
|
$startTime,
|
|
$endTime
|
|
);
|
|
|
|
$circuitAnalysis = $this->analyzeCircuitPerformance($metrics);
|
|
$analytics['circuit_breakers'][$circuitName] = $circuitAnalysis;
|
|
|
|
$totalRequests += $circuitAnalysis['total_requests'];
|
|
$totalFailures += $circuitAnalysis['total_failures'];
|
|
$totalResponseTime += $circuitAnalysis['total_response_time_ms'];
|
|
|
|
if ($circuitAnalysis['peak_response_time'] > $peakResponseTime->toMilliseconds()) {
|
|
$peakResponseTime = Duration::fromMilliseconds($circuitAnalysis['peak_response_time']);
|
|
}
|
|
}
|
|
|
|
// Analyze error boundaries
|
|
foreach ($boundaries as $boundaryName) {
|
|
$metrics = $this->getMetricsInTimeRange(
|
|
"monitoring:boundary:{$boundaryName}",
|
|
$startTime,
|
|
$endTime
|
|
);
|
|
|
|
$boundaryAnalysis = $this->analyzeBoundaryPerformance($metrics);
|
|
$analytics['error_boundaries'][$boundaryName] = $boundaryAnalysis;
|
|
}
|
|
|
|
// System summary
|
|
if ($totalRequests > 0) {
|
|
$analytics['system_summary'] = [
|
|
'total_requests' => $totalRequests,
|
|
'total_failures' => $totalFailures,
|
|
'average_response_time' => Duration::fromMilliseconds($totalResponseTime / $totalRequests),
|
|
'peak_response_time' => $peakResponseTime,
|
|
'error_rate' => Percentage::fromFloat($totalFailures / $totalRequests),
|
|
];
|
|
}
|
|
|
|
return $analytics;
|
|
}
|
|
|
|
/**
|
|
* Generate monitoring alerts
|
|
*/
|
|
public function generateAlerts(): array
|
|
{
|
|
$alerts = [];
|
|
|
|
// Check circuit breaker alerts
|
|
foreach ($this->getActiveCircuits() as $circuitName) {
|
|
$health = $this->getCircuitHealth($circuitName);
|
|
if ($health['status'] !== 'healthy') {
|
|
$alerts[] = new MonitoringAlert(
|
|
type: 'circuit_breaker',
|
|
severity: $health['status'] === 'failed' ? 'critical' : 'warning',
|
|
component: $circuitName,
|
|
message: "Circuit breaker '{$circuitName}' is in {$health['status']} state",
|
|
timestamp: $this->clock->time(),
|
|
metrics: $health,
|
|
);
|
|
}
|
|
}
|
|
|
|
// Check error boundary alerts
|
|
foreach ($this->getActiveBoundaries() as $boundaryName) {
|
|
$health = $this->getBoundaryHealth($boundaryName);
|
|
if ($health['status'] !== 'healthy') {
|
|
$alerts[] = new MonitoringAlert(
|
|
type: 'error_boundary',
|
|
severity: $health['status'] === 'failed' ? 'critical' : 'warning',
|
|
component: $boundaryName,
|
|
message: "Error boundary '{$boundaryName}' is in {$health['status']} state",
|
|
timestamp: $this->clock->time(),
|
|
metrics: $health,
|
|
);
|
|
}
|
|
}
|
|
|
|
return $alerts;
|
|
}
|
|
|
|
/**
|
|
* Get circuit health status
|
|
*/
|
|
private function getCircuitHealth(string $circuitName): array
|
|
{
|
|
$aggregatedKey = "monitoring:circuit:aggregated:{$circuitName}";
|
|
$metrics = $this->stateManager->getState($aggregatedKey);
|
|
|
|
if ($metrics === null) {
|
|
return [
|
|
'status' => 'unknown',
|
|
'last_updated' => null,
|
|
'metrics' => null,
|
|
];
|
|
}
|
|
|
|
$status = $this->calculateHealthStatus($metrics);
|
|
|
|
return [
|
|
'status' => $status,
|
|
'last_updated' => $metrics['last_updated'] ?? null,
|
|
'metrics' => $metrics,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Get boundary health status
|
|
*/
|
|
private function getBoundaryHealth(string $boundaryName): array
|
|
{
|
|
$aggregatedKey = "monitoring:boundary:aggregated:{$boundaryName}";
|
|
$metrics = $this->stateManager->getState($aggregatedKey);
|
|
|
|
if ($metrics === null) {
|
|
return [
|
|
'status' => 'unknown',
|
|
'last_updated' => null,
|
|
'metrics' => null,
|
|
];
|
|
}
|
|
|
|
$status = $this->calculateBoundaryHealthStatus($metrics);
|
|
|
|
return [
|
|
'status' => $status,
|
|
'last_updated' => $metrics['last_updated'] ?? null,
|
|
'metrics' => $metrics,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Calculate health status from metrics
|
|
*/
|
|
private function calculateHealthStatus(array $metrics): string
|
|
{
|
|
$errorRate = $metrics['error_rate'] ?? 0.0;
|
|
$responseTime = $metrics['average_response_time'] ?? 0.0;
|
|
|
|
if ($errorRate > $this->config->criticalErrorRateThreshold) {
|
|
return 'failed';
|
|
}
|
|
|
|
if ($errorRate > $this->config->warningErrorRateThreshold ||
|
|
$responseTime > $this->config->slowResponseThreshold->toMilliseconds()) {
|
|
return 'degraded';
|
|
}
|
|
|
|
return 'healthy';
|
|
}
|
|
|
|
/**
|
|
* Calculate boundary health status from metrics
|
|
*/
|
|
private function calculateBoundaryHealthStatus(array $metrics): string
|
|
{
|
|
$fallbackRate = $metrics['fallback_rate'] ?? 0.0;
|
|
$errorRate = $metrics['error_rate'] ?? 0.0;
|
|
|
|
if ($fallbackRate > $this->config->criticalFallbackRateThreshold) {
|
|
return 'failed';
|
|
}
|
|
|
|
if ($fallbackRate > $this->config->warningFallbackRateThreshold ||
|
|
$errorRate > $this->config->warningErrorRateThreshold) {
|
|
return 'degraded';
|
|
}
|
|
|
|
return 'healthy';
|
|
}
|
|
|
|
/**
|
|
* Update aggregated metrics for circuit breaker
|
|
*/
|
|
private function updateAggregatedMetrics(
|
|
string $circuitName,
|
|
CircuitBreakerMetrics $metrics,
|
|
Timestamp $timestamp
|
|
): void {
|
|
$aggregatedKey = "monitoring:circuit:aggregated:{$circuitName}";
|
|
|
|
$this->stateManager->updateState($aggregatedKey, function ($existing) use ($metrics, $timestamp) {
|
|
return $this->aggregateCircuitMetrics($existing, $metrics, $timestamp);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Update aggregated metrics for error boundary
|
|
*/
|
|
private function updateBoundaryAggregatedMetrics(
|
|
string $boundaryName,
|
|
ErrorBoundaryMetrics $metrics,
|
|
Timestamp $timestamp
|
|
): void {
|
|
$aggregatedKey = "monitoring:boundary:aggregated:{$boundaryName}";
|
|
|
|
$this->stateManager->updateState($aggregatedKey, function ($existing) use ($metrics, $timestamp) {
|
|
return $this->aggregateBoundaryMetrics($existing, $metrics, $timestamp);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Aggregate circuit breaker metrics
|
|
*/
|
|
private function aggregateCircuitMetrics(?array $existing, CircuitBreakerMetrics $metrics, Timestamp $timestamp): array
|
|
{
|
|
if ($existing === null) {
|
|
return array_merge($metrics->toArray(), [
|
|
'first_seen' => $timestamp->toIsoString(),
|
|
'last_updated' => $timestamp->toIsoString(),
|
|
]);
|
|
}
|
|
|
|
// Simple aggregation - in production you'd want more sophisticated algorithms
|
|
$totalRequests = ($existing['total_requests'] ?? 0) + $metrics->totalRequests;
|
|
$totalFailures = ($existing['total_failures'] ?? 0) + $metrics->totalFailures;
|
|
|
|
return [
|
|
'total_requests' => $totalRequests,
|
|
'total_failures' => $totalFailures,
|
|
'error_rate' => $totalRequests > 0 ? $totalFailures / $totalRequests : 0.0,
|
|
'average_response_time' => $metrics->averageResponseTime->toMilliseconds(),
|
|
'peak_response_time' => max(
|
|
$existing['peak_response_time'] ?? 0,
|
|
$metrics->peakResponseTime->toMilliseconds()
|
|
),
|
|
'first_seen' => $existing['first_seen'],
|
|
'last_updated' => $timestamp->toIsoString(),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Aggregate boundary metrics
|
|
*/
|
|
private function aggregateBoundaryMetrics(?array $existing, ErrorBoundaryMetrics $metrics, Timestamp $timestamp): array
|
|
{
|
|
if ($existing === null) {
|
|
return array_merge($metrics->toArray(), [
|
|
'first_seen' => $timestamp->toIsoString(),
|
|
'last_updated' => $timestamp->toIsoString(),
|
|
]);
|
|
}
|
|
|
|
$totalExecutions = ($existing['total_executions'] ?? 0) + $metrics->totalExecutions;
|
|
$totalFallbacks = ($existing['total_fallbacks'] ?? 0) + $metrics->totalFallbacks;
|
|
$totalErrors = ($existing['total_errors'] ?? 0) + $metrics->totalErrors;
|
|
|
|
return [
|
|
'total_executions' => $totalExecutions,
|
|
'total_fallbacks' => $totalFallbacks,
|
|
'total_errors' => $totalErrors,
|
|
'fallback_rate' => $totalExecutions > 0 ? $totalFallbacks / $totalExecutions : 0.0,
|
|
'error_rate' => $totalExecutions > 0 ? $totalErrors / $totalExecutions : 0.0,
|
|
'average_execution_time' => $metrics->averageExecutionTime->toMilliseconds(),
|
|
'first_seen' => $existing['first_seen'],
|
|
'last_updated' => $timestamp->toIsoString(),
|
|
];
|
|
}
|
|
|
|
// Placeholder methods - implement based on your specific needs
|
|
private function getActiveCircuits(): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function getActiveBoundaries(): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function getAggregatedCircuitMetrics(): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function getAggregatedBoundaryMetrics(): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function getMetricsInTimeRange(string $prefix, Timestamp $start, Timestamp $end): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function analyzeTrends(array $metrics): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function analyzeCircuitPerformance(array $metrics): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function analyzeBoundaryPerformance(array $metrics): array
|
|
{
|
|
return [];
|
|
}
|
|
|
|
private function checkCircuitBreakerAlerts(string $name, CircuitBreakerMetrics $metrics): void
|
|
{
|
|
}
|
|
|
|
private function checkErrorBoundaryAlerts(string $name, ErrorBoundaryMetrics $metrics): void
|
|
{
|
|
}
|
|
}
|