- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
301 lines
10 KiB
PHP
301 lines
10 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\ErrorAggregation;
|
|
|
|
use App\Framework\Exception\Core\ErrorSeverity;
|
|
use App\Framework\Ulid\Ulid;
|
|
|
|
/**
|
|
* Represents a pattern of similar errors for analysis and alerting
|
|
*/
|
|
final readonly class ErrorPattern
|
|
{
|
|
public function __construct(
|
|
public Ulid $id,
|
|
public string $fingerprint,
|
|
public string $service,
|
|
public string $component,
|
|
public string $operation,
|
|
public string $errorCode,
|
|
public string $normalizedMessage,
|
|
public ErrorSeverity $severity,
|
|
public int $occurrenceCount,
|
|
public \DateTimeImmutable $firstOccurrence,
|
|
public \DateTimeImmutable $lastOccurrence,
|
|
public array $affectedUsers = [],
|
|
public array $affectedIps = [],
|
|
public bool $isActive = true,
|
|
public bool $isAcknowledged = false,
|
|
public ?string $acknowledgedBy = null,
|
|
public ?\DateTimeImmutable $acknowledgedAt = null,
|
|
public ?string $resolution = null,
|
|
public array $metadata = [],
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Creates new pattern from first error event
|
|
*/
|
|
public static function fromErrorEvent(ErrorEvent $event, \App\Framework\DateTime\Clock $clock): self
|
|
{
|
|
return new self(
|
|
id: new Ulid($clock),
|
|
fingerprint: $event->getFingerprint(),
|
|
service: $event->service,
|
|
component: $event->component,
|
|
operation: $event->operation,
|
|
errorCode: $event->errorCode->value,
|
|
normalizedMessage: $event->errorMessage,
|
|
severity: $event->severity,
|
|
occurrenceCount: 1,
|
|
firstOccurrence: $event->occurredAt,
|
|
lastOccurrence: $event->occurredAt,
|
|
affectedUsers: $event->userId ? [$event->userId] : [],
|
|
affectedIps: $event->clientIp ? [$event->clientIp] : [],
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Updates pattern with new error occurrence
|
|
*/
|
|
public function withNewOccurrence(ErrorEvent $event): self
|
|
{
|
|
$affectedUsers = $this->affectedUsers;
|
|
if ($event->userId && ! in_array($event->userId, $affectedUsers)) {
|
|
$affectedUsers[] = $event->userId;
|
|
}
|
|
|
|
$affectedIps = $this->affectedIps;
|
|
if ($event->clientIp && ! in_array($event->clientIp, $affectedIps)) {
|
|
$affectedIps[] = $event->clientIp;
|
|
}
|
|
|
|
return new self(
|
|
id: $this->id,
|
|
fingerprint: $this->fingerprint,
|
|
service: $this->service,
|
|
component: $this->component,
|
|
operation: $this->operation,
|
|
errorCode: $this->errorCode,
|
|
normalizedMessage: $this->normalizedMessage,
|
|
severity: $this->severity,
|
|
occurrenceCount: $this->occurrenceCount + 1,
|
|
firstOccurrence: $this->firstOccurrence,
|
|
lastOccurrence: $event->occurredAt,
|
|
affectedUsers: $affectedUsers,
|
|
affectedIps: $affectedIps,
|
|
isActive: $this->isActive,
|
|
isAcknowledged: $this->isAcknowledged,
|
|
acknowledgedBy: $this->acknowledgedBy,
|
|
acknowledgedAt: $this->acknowledgedAt,
|
|
resolution: $this->resolution,
|
|
metadata: $this->metadata,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Marks pattern as acknowledged
|
|
*/
|
|
public function acknowledge(string $acknowledgedBy, ?string $resolution = null): self
|
|
{
|
|
return new self(
|
|
id: $this->id,
|
|
fingerprint: $this->fingerprint,
|
|
service: $this->service,
|
|
component: $this->component,
|
|
operation: $this->operation,
|
|
errorCode: $this->errorCode,
|
|
normalizedMessage: $this->normalizedMessage,
|
|
severity: $this->severity,
|
|
occurrenceCount: $this->occurrenceCount,
|
|
firstOccurrence: $this->firstOccurrence,
|
|
lastOccurrence: $this->lastOccurrence,
|
|
affectedUsers: $this->affectedUsers,
|
|
affectedIps: $this->affectedIps,
|
|
isActive: $this->isActive,
|
|
isAcknowledged: true,
|
|
acknowledgedBy: $acknowledgedBy,
|
|
acknowledgedAt: new \DateTimeImmutable(),
|
|
resolution: $resolution,
|
|
metadata: $this->metadata,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Marks pattern as resolved (inactive)
|
|
*/
|
|
public function resolve(string $resolution): self
|
|
{
|
|
return new self(
|
|
id: $this->id,
|
|
fingerprint: $this->fingerprint,
|
|
service: $this->service,
|
|
component: $this->component,
|
|
operation: $this->operation,
|
|
errorCode: $this->errorCode,
|
|
normalizedMessage: $this->normalizedMessage,
|
|
severity: $this->severity,
|
|
occurrenceCount: $this->occurrenceCount,
|
|
firstOccurrence: $this->firstOccurrence,
|
|
lastOccurrence: $this->lastOccurrence,
|
|
affectedUsers: $this->affectedUsers,
|
|
affectedIps: $this->affectedIps,
|
|
isActive: false,
|
|
isAcknowledged: true,
|
|
acknowledgedBy: $this->acknowledgedBy,
|
|
acknowledgedAt: $this->acknowledgedAt ?? new \DateTimeImmutable(),
|
|
resolution: $resolution,
|
|
metadata: $this->metadata,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Gets error frequency (errors per minute)
|
|
*/
|
|
public function getFrequency(): float
|
|
{
|
|
$duration = $this->lastOccurrence->getTimestamp() - $this->firstOccurrence->getTimestamp();
|
|
|
|
if ($duration <= 0) {
|
|
return $this->occurrenceCount; // All errors in same second
|
|
}
|
|
|
|
return $this->occurrenceCount / ($duration / 60); // errors per minute
|
|
}
|
|
|
|
/**
|
|
* Checks if pattern indicates a critical issue
|
|
*/
|
|
public function isCriticalPattern(): bool
|
|
{
|
|
// High frequency errors
|
|
if ($this->getFrequency() > 10) { // More than 10 errors per minute
|
|
return true;
|
|
}
|
|
|
|
// Many affected users
|
|
if (count($this->affectedUsers) > 50) {
|
|
return true;
|
|
}
|
|
|
|
// Critical severity with multiple occurrences
|
|
if ($this->severity === ErrorSeverity::CRITICAL && $this->occurrenceCount >= 3) {
|
|
return true;
|
|
}
|
|
|
|
// Error severity with very high occurrence count
|
|
if ($this->severity === ErrorSeverity::ERROR && $this->occurrenceCount >= 100) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Gets alert threshold based on pattern characteristics
|
|
*/
|
|
public function getAlertThreshold(): int
|
|
{
|
|
return match ($this->severity) {
|
|
ErrorSeverity::CRITICAL => 1, // Alert on first occurrence
|
|
ErrorSeverity::ERROR => 5, // Alert after 5 occurrences
|
|
ErrorSeverity::WARNING => 20, // Alert after 20 occurrences
|
|
ErrorSeverity::INFO => 100, // Alert after 100 occurrences
|
|
ErrorSeverity::DEBUG => 500, // Alert after 500 occurrences
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Checks if pattern should trigger alert
|
|
*/
|
|
public function shouldAlert(): bool
|
|
{
|
|
if ($this->isAcknowledged) {
|
|
return false;
|
|
}
|
|
|
|
if (! $this->isActive) {
|
|
return false;
|
|
}
|
|
|
|
return $this->occurrenceCount >= $this->getAlertThreshold();
|
|
}
|
|
|
|
/**
|
|
* Gets urgency for alerting
|
|
*/
|
|
public function getAlertUrgency(): AlertUrgency
|
|
{
|
|
if ($this->isCriticalPattern()) {
|
|
return AlertUrgency::URGENT;
|
|
}
|
|
|
|
return match ($this->severity) {
|
|
ErrorSeverity::CRITICAL => AlertUrgency::URGENT,
|
|
ErrorSeverity::ERROR => AlertUrgency::HIGH,
|
|
ErrorSeverity::WARNING => AlertUrgency::MEDIUM,
|
|
default => AlertUrgency::LOW,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Converts to array for storage/transmission
|
|
*/
|
|
public function toArray(): array
|
|
{
|
|
return [
|
|
'id' => (string) $this->id,
|
|
'fingerprint' => $this->fingerprint,
|
|
'service' => $this->service,
|
|
'component' => $this->component,
|
|
'operation' => $this->operation,
|
|
'error_code' => $this->errorCode,
|
|
'normalized_message' => $this->normalizedMessage,
|
|
'severity' => $this->severity->value,
|
|
'occurrence_count' => $this->occurrenceCount,
|
|
'first_occurrence' => $this->firstOccurrence->format('c'),
|
|
'last_occurrence' => $this->lastOccurrence->format('c'),
|
|
'affected_users' => $this->affectedUsers,
|
|
'affected_ips' => $this->affectedIps,
|
|
'is_active' => $this->isActive,
|
|
'is_acknowledged' => $this->isAcknowledged,
|
|
'acknowledged_by' => $this->acknowledgedBy,
|
|
'acknowledged_at' => $this->acknowledgedAt?->format('c'),
|
|
'resolution' => $this->resolution,
|
|
'metadata' => $this->metadata,
|
|
'frequency' => $this->getFrequency(),
|
|
'is_critical' => $this->isCriticalPattern(),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Creates from array (for deserialization)
|
|
*/
|
|
public static function fromArray(array $data): self
|
|
{
|
|
return new self(
|
|
id: Ulid::fromString($data['id']),
|
|
fingerprint: $data['fingerprint'],
|
|
service: $data['service'],
|
|
component: $data['component'],
|
|
operation: $data['operation'],
|
|
errorCode: $data['error_code'],
|
|
normalizedMessage: $data['normalized_message'],
|
|
severity: ErrorSeverity::from($data['severity']),
|
|
occurrenceCount: $data['occurrence_count'],
|
|
firstOccurrence: new \DateTimeImmutable($data['first_occurrence']),
|
|
lastOccurrence: new \DateTimeImmutable($data['last_occurrence']),
|
|
affectedUsers: $data['affected_users'] ?? [],
|
|
affectedIps: $data['affected_ips'] ?? [],
|
|
isActive: $data['is_active'] ?? true,
|
|
isAcknowledged: $data['is_acknowledged'] ?? false,
|
|
acknowledgedBy: $data['acknowledged_by'],
|
|
acknowledgedAt: $data['acknowledged_at'] ? new \DateTimeImmutable($data['acknowledged_at']) : null,
|
|
resolution: $data['resolution'],
|
|
metadata: $data['metadata'] ?? [],
|
|
);
|
|
}
|
|
}
|