- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
531 lines
17 KiB
PHP
531 lines
17 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Waf\MachineLearning;
|
|
|
|
use App\Framework\Core\ValueObjects\Duration;
|
|
use App\Framework\Core\ValueObjects\Percentage;
|
|
use App\Framework\DateTime\Clock;
|
|
use App\Framework\MachineLearning\ValueObjects\Baseline;
|
|
use App\Framework\MachineLearning\ValueObjects\Feature;
|
|
use App\Framework\MachineLearning\ValueObjects\FeatureType;
|
|
|
|
/**
|
|
* Manages behavioral baselines for anomaly detection
|
|
*/
|
|
final class BaselineManager
|
|
{
|
|
public function __construct(
|
|
private readonly Clock $clock,
|
|
private readonly Duration $baselineUpdateInterval,
|
|
private readonly Duration $baselineMaxAge,
|
|
private readonly int $minSamplesForBaseline = 50,
|
|
private readonly int $maxSamplesPerBaseline = 10000,
|
|
private readonly float $learningRate = 0.1,
|
|
private readonly bool $enableAdaptiveBaselines = true,
|
|
private readonly bool $enableSeasonalAdjustment = true,
|
|
private array $baselines = [],
|
|
private array $featureHistory = [],
|
|
private array $updateTimestamps = [],
|
|
private array $performanceMetrics = []
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Get baseline for a specific behavior type and feature
|
|
*/
|
|
public function getBaseline(FeatureType $behaviorType, string $featureName = 'default'): ?Baseline
|
|
{
|
|
$key = $this->generateBaselineKey($behaviorType, $featureName);
|
|
|
|
if (! isset($this->baselines[$key])) {
|
|
return null;
|
|
}
|
|
|
|
$baseline = $this->baselines[$key];
|
|
|
|
// Check if baseline is too old
|
|
if ($this->isBaselineExpired($baseline)) {
|
|
unset($this->baselines[$key]);
|
|
|
|
return null;
|
|
}
|
|
|
|
return $baseline;
|
|
}
|
|
|
|
/**
|
|
* Update baseline with new feature data
|
|
*/
|
|
public function updateBaseline(Feature $feature): void
|
|
{
|
|
$key = $this->generateBaselineKey($feature->type, $feature->name);
|
|
|
|
// Record feature in history
|
|
$this->recordFeature($key, $feature);
|
|
|
|
// Check if update is needed
|
|
if (! $this->shouldUpdateBaseline($key)) {
|
|
return;
|
|
}
|
|
|
|
$existingBaseline = $this->baselines[$key] ?? null;
|
|
$featureHistory = $this->featureHistory[$key] ?? [];
|
|
|
|
if (count($featureHistory) < $this->minSamplesForBaseline) {
|
|
return;
|
|
}
|
|
|
|
// Create or update baseline
|
|
if ($existingBaseline === null) {
|
|
$this->baselines[$key] = $this->createInitialBaseline($feature->type, $feature->name, $featureHistory);
|
|
} else {
|
|
$this->baselines[$key] = $this->updateExistingBaseline($existingBaseline, $featureHistory);
|
|
}
|
|
|
|
$this->updateTimestamps[$key] = $this->clock->time();
|
|
|
|
// Record performance metrics
|
|
$this->recordBaselineUpdate($key, count($featureHistory));
|
|
}
|
|
|
|
/**
|
|
* Update baseline incrementally with new feature
|
|
*/
|
|
public function updateBaselineIncremental(Feature $feature): void
|
|
{
|
|
$key = $this->generateBaselineKey($feature->type, $feature->name);
|
|
$existingBaseline = $this->baselines[$key] ?? null;
|
|
|
|
if ($existingBaseline === null) {
|
|
// Need enough samples for initial baseline
|
|
$this->updateBaseline($feature);
|
|
|
|
return;
|
|
}
|
|
|
|
// Incremental update using exponential moving average
|
|
$newBaseline = $this->incrementalUpdate($existingBaseline, $feature);
|
|
$this->baselines[$key] = $newBaseline;
|
|
$this->updateTimestamps[$key] = $this->clock->time();
|
|
|
|
// Record feature for history
|
|
$this->recordFeature($key, $feature);
|
|
}
|
|
|
|
/**
|
|
* Get all baselines for a behavior type
|
|
*/
|
|
public function getBaselinesForFeatureType(FeatureType $behaviorType): array
|
|
{
|
|
$baselines = [];
|
|
|
|
foreach ($this->baselines as $key => $baseline) {
|
|
if ($baseline->type === $behaviorType && ! $this->isBaselineExpired($baseline)) {
|
|
$baselines[$key] = $baseline;
|
|
}
|
|
}
|
|
|
|
return $baselines;
|
|
}
|
|
|
|
/**
|
|
* Clean expired baselines
|
|
*/
|
|
public function cleanExpiredBaselines(): int
|
|
{
|
|
$removedCount = 0;
|
|
|
|
foreach ($this->baselines as $key => $baseline) {
|
|
if ($this->isBaselineExpired($baseline)) {
|
|
unset($this->baselines[$key]);
|
|
unset($this->featureHistory[$key]);
|
|
unset($this->updateTimestamps[$key]);
|
|
$removedCount++;
|
|
}
|
|
}
|
|
|
|
return $removedCount;
|
|
}
|
|
|
|
/**
|
|
* Get baseline statistics
|
|
*/
|
|
public function getBaselineStats(): array
|
|
{
|
|
$totalBaselines = count($this->baselines);
|
|
$expiredBaselines = 0;
|
|
$avgSampleSize = 0;
|
|
$avgAge = 0;
|
|
|
|
$now = $this->clock->time();
|
|
|
|
foreach ($this->baselines as $baseline) {
|
|
if ($this->isBaselineExpired($baseline)) {
|
|
$expiredBaselines++;
|
|
}
|
|
|
|
$avgSampleSize += $baseline->sampleSize;
|
|
$age = $baseline->lastUpdated->diff($now);
|
|
$avgAge += $age->toSeconds();
|
|
}
|
|
|
|
if ($totalBaselines > 0) {
|
|
$avgSampleSize /= $totalBaselines;
|
|
$avgAge /= $totalBaselines;
|
|
}
|
|
|
|
return [
|
|
'total_baselines' => $totalBaselines,
|
|
'expired_baselines' => $expiredBaselines,
|
|
'active_baselines' => $totalBaselines - $expiredBaselines,
|
|
'avg_sample_size' => $avgSampleSize,
|
|
'avg_age_seconds' => $avgAge,
|
|
'behavior_types' => $this->getBaselineFeatureTypes(),
|
|
'feature_history_size' => array_sum(array_map('count', $this->featureHistory)),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Create initial baseline from feature history
|
|
*/
|
|
private function createInitialBaseline(FeatureType $behaviorType, string $featureName, array $featureHistory): Baseline
|
|
{
|
|
$values = array_map(fn (Feature $f) => $f->value, $featureHistory);
|
|
|
|
$stats = $this->calculateStatistics($values);
|
|
$confidence = $this->calculateBaselineConfidence($stats['sample_size']);
|
|
|
|
// Apply seasonal adjustment if enabled
|
|
if ($this->enableSeasonalAdjustment) {
|
|
$stats = $this->applySeasonalAdjustment($stats, $featureHistory);
|
|
}
|
|
|
|
return new Baseline(
|
|
type: $behaviorType,
|
|
mean: $stats['mean'],
|
|
standardDeviation: $stats['std_dev'],
|
|
sampleSize: $stats['sample_size'],
|
|
p50: $stats['p50'],
|
|
p95: $stats['p95'],
|
|
p99: $stats['p99'],
|
|
confidence: $confidence,
|
|
lastUpdated: $this->clock->time(),
|
|
metadata: [
|
|
'feature_name' => $featureName,
|
|
'creation_method' => 'initial',
|
|
'seasonal_adjusted' => $this->enableSeasonalAdjustment,
|
|
]
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Update existing baseline with new data
|
|
*/
|
|
private function updateExistingBaseline(Baseline $existingBaseline, array $featureHistory): Baseline
|
|
{
|
|
if (! $this->enableAdaptiveBaselines) {
|
|
return $existingBaseline;
|
|
}
|
|
|
|
$values = array_map(fn (Feature $f) => $f->value, $featureHistory);
|
|
$newStats = $this->calculateStatistics($values);
|
|
|
|
// Adaptive learning rate based on sample size and confidence
|
|
$adaptiveLearningRate = $this->calculateAdaptiveLearningRate($existingBaseline, $newStats['sample_size']);
|
|
|
|
// Exponential moving average update
|
|
$updatedMean = $existingBaseline->mean * (1 - $adaptiveLearningRate) + $newStats['mean'] * $adaptiveLearningRate;
|
|
$updatedStdDev = $existingBaseline->standardDeviation * (1 - $adaptiveLearningRate) + $newStats['std_dev'] * $adaptiveLearningRate;
|
|
|
|
// Update percentiles with recent data
|
|
$combinedValues = array_slice($values, -$this->maxSamplesPerBaseline);
|
|
sort($combinedValues);
|
|
|
|
$p50 = $this->calculatePercentile($combinedValues, 50);
|
|
$p95 = $this->calculatePercentile($combinedValues, 95);
|
|
$p99 = $this->calculatePercentile($combinedValues, 99);
|
|
|
|
$newSampleSize = min($existingBaseline->sampleSize + $newStats['sample_size'], $this->maxSamplesPerBaseline);
|
|
$confidence = $this->calculateBaselineConfidence($newSampleSize);
|
|
|
|
return new Baseline(
|
|
type: $existingBaseline->type,
|
|
mean: $updatedMean,
|
|
standardDeviation: $updatedStdDev,
|
|
sampleSize: $newSampleSize,
|
|
p50: $p50,
|
|
p95: $p95,
|
|
p99: $p99,
|
|
confidence: $confidence,
|
|
lastUpdated: $this->clock->time(),
|
|
metadata: array_merge($existingBaseline->metadata, [
|
|
'update_method' => 'adaptive',
|
|
'learning_rate' => $adaptiveLearningRate,
|
|
'updates_count' => ($existingBaseline->metadata['updates_count'] ?? 0) + 1,
|
|
])
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Incremental update using exponential moving average
|
|
*/
|
|
private function incrementalUpdate(Baseline $baseline, Feature $newFeature): Baseline
|
|
{
|
|
$learningRate = $this->calculateAdaptiveLearningRate($baseline, 1);
|
|
|
|
$updatedMean = $baseline->mean * (1 - $learningRate) + $newFeature->value * $learningRate;
|
|
|
|
// Update variance using Welford's online algorithm
|
|
$delta = $newFeature->value - $baseline->mean;
|
|
$delta2 = $newFeature->value - $updatedMean;
|
|
$variance = pow($baseline->standardDeviation, 2);
|
|
$updatedVariance = $variance * (1 - $learningRate) + $delta * $delta2 * $learningRate;
|
|
$updatedStdDev = sqrt(max(0, $updatedVariance));
|
|
|
|
return new Baseline(
|
|
type: $baseline->type,
|
|
mean: $updatedMean,
|
|
standardDeviation: $updatedStdDev,
|
|
sampleSize: $baseline->sampleSize + 1,
|
|
p50: $baseline->p50, // Keep existing percentiles for incremental updates
|
|
p95: $baseline->p95,
|
|
p99: $baseline->p99,
|
|
confidence: $baseline->confidence,
|
|
lastUpdated: $this->clock->time(),
|
|
metadata: array_merge($baseline->metadata, [
|
|
'update_method' => 'incremental',
|
|
'last_value' => $newFeature->value,
|
|
])
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Calculate statistics from values
|
|
*/
|
|
private function calculateStatistics(array $values): array
|
|
{
|
|
if (empty($values)) {
|
|
return [
|
|
'mean' => 0.0,
|
|
'std_dev' => 0.0,
|
|
'sample_size' => 0,
|
|
'p50' => 0.0,
|
|
'p95' => 0.0,
|
|
'p99' => 0.0,
|
|
];
|
|
}
|
|
|
|
$mean = array_sum($values) / count($values);
|
|
$variance = array_sum(array_map(fn ($v) => pow($v - $mean, 2), $values)) / count($values);
|
|
$stdDev = sqrt($variance);
|
|
|
|
sort($values);
|
|
|
|
return [
|
|
'mean' => $mean,
|
|
'std_dev' => $stdDev,
|
|
'sample_size' => count($values),
|
|
'p50' => $this->calculatePercentile($values, 50),
|
|
'p95' => $this->calculatePercentile($values, 95),
|
|
'p99' => $this->calculatePercentile($values, 99),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Calculate percentile from sorted values
|
|
*/
|
|
private function calculatePercentile(array $sortedValues, float $percentile): float
|
|
{
|
|
if (empty($sortedValues)) {
|
|
return 0.0;
|
|
}
|
|
|
|
$index = ($percentile / 100) * (count($sortedValues) - 1);
|
|
$lower = (int)floor($index);
|
|
$upper = (int)ceil($index);
|
|
|
|
if ($lower === $upper) {
|
|
return $sortedValues[$lower];
|
|
}
|
|
|
|
$weight = $index - $lower;
|
|
|
|
return $sortedValues[$lower] * (1 - $weight) + $sortedValues[$upper] * $weight;
|
|
}
|
|
|
|
/**
|
|
* Calculate adaptive learning rate
|
|
*/
|
|
private function calculateAdaptiveLearningRate(Baseline $baseline, int $newSamples): float
|
|
{
|
|
// Decrease learning rate as confidence increases
|
|
$confidenceFactor = 1.0 - ($baseline->confidence->getValue() / 100.0);
|
|
|
|
// Increase learning rate for more new samples
|
|
$sampleFactor = min(1.0, $newSamples / 10.0);
|
|
|
|
return $this->learningRate * $confidenceFactor * $sampleFactor;
|
|
}
|
|
|
|
/**
|
|
* Calculate baseline confidence based on sample size
|
|
*/
|
|
private function calculateBaselineConfidence(int $sampleSize): Percentage
|
|
{
|
|
// Confidence increases with sample size, plateaus at 95%
|
|
$confidence = min(95.0, ($sampleSize / $this->minSamplesForBaseline) * 75.0);
|
|
|
|
return new Percentage(max(0.0, $confidence));
|
|
}
|
|
|
|
/**
|
|
* Apply seasonal adjustment to statistics
|
|
*/
|
|
private function applySeasonalAdjustment(array $stats, array $featureHistory): array
|
|
{
|
|
// Simple seasonal adjustment based on time of day/week patterns
|
|
$hourCounts = array_fill(0, 24, 0);
|
|
$dayOfWeekCounts = array_fill(0, 7, 0);
|
|
|
|
foreach ($featureHistory as $feature) {
|
|
if ($feature->timestamp !== null) {
|
|
$hour = (int)$feature->timestamp->format('H');
|
|
$dayOfWeek = (int)$feature->timestamp->format('w');
|
|
|
|
$hourCounts[$hour]++;
|
|
$dayOfWeekCounts[$dayOfWeek]++;
|
|
}
|
|
}
|
|
|
|
// Calculate seasonal factors (simplified)
|
|
$currentHour = (int)$this->clock->time()->format('H');
|
|
$currentDayOfWeek = (int)$this->clock->time()->format('w');
|
|
|
|
$avgHourlyCount = array_sum($hourCounts) / 24;
|
|
$avgDailyCount = array_sum($dayOfWeekCounts) / 7;
|
|
|
|
$hourlyFactor = $avgHourlyCount > 0 ? $hourCounts[$currentHour] / $avgHourlyCount : 1.0;
|
|
$dailyFactor = $avgDailyCount > 0 ? $dayOfWeekCounts[$currentDayOfWeek] / $avgDailyCount : 1.0;
|
|
|
|
// Apply seasonal adjustment (conservative)
|
|
$seasonalFactor = ($hourlyFactor + $dailyFactor) / 2;
|
|
$adjustmentWeight = 0.1; // Limit seasonal impact
|
|
|
|
$stats['mean'] *= (1 + ($seasonalFactor - 1) * $adjustmentWeight);
|
|
$stats['std_dev'] *= (1 + abs($seasonalFactor - 1) * $adjustmentWeight);
|
|
|
|
return $stats;
|
|
}
|
|
|
|
/**
|
|
* Record feature in history
|
|
*/
|
|
private function recordFeature(string $key, Feature $feature): void
|
|
{
|
|
if (! isset($this->featureHistory[$key])) {
|
|
$this->featureHistory[$key] = [];
|
|
}
|
|
|
|
$this->featureHistory[$key][] = $feature;
|
|
|
|
// Limit history size
|
|
if (count($this->featureHistory[$key]) > $this->maxSamplesPerBaseline) {
|
|
array_shift($this->featureHistory[$key]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if baseline should be updated
|
|
*/
|
|
private function shouldUpdateBaseline(string $key): bool
|
|
{
|
|
$lastUpdate = $this->updateTimestamps[$key] ?? null;
|
|
|
|
if ($lastUpdate === null) {
|
|
return true;
|
|
}
|
|
|
|
$timeSinceUpdate = $lastUpdate->diff($this->clock->time());
|
|
|
|
return $timeSinceUpdate->toMilliseconds() >= $this->baselineUpdateInterval->toMilliseconds();
|
|
}
|
|
|
|
/**
|
|
* Check if baseline is expired
|
|
*/
|
|
private function isBaselineExpired(Baseline $baseline): bool
|
|
{
|
|
$age = $baseline->lastUpdated->diff($this->clock->time());
|
|
|
|
return $age->toMilliseconds() > $this->baselineMaxAge->toMilliseconds();
|
|
}
|
|
|
|
/**
|
|
* Generate baseline key
|
|
*/
|
|
private function generateBaselineKey(FeatureType $behaviorType, string $featureName): string
|
|
{
|
|
return $behaviorType->value . ':' . $featureName;
|
|
}
|
|
|
|
/**
|
|
* Get behavior types with baselines
|
|
*/
|
|
private function getBaselineFeatureTypes(): array
|
|
{
|
|
$types = [];
|
|
|
|
foreach ($this->baselines as $baseline) {
|
|
$types[] = $baseline->type->value;
|
|
}
|
|
|
|
return array_unique($types);
|
|
}
|
|
|
|
/**
|
|
* Record baseline update metrics
|
|
*/
|
|
private function recordBaselineUpdate(string $key, int $sampleCount): void
|
|
{
|
|
$this->performanceMetrics[] = [
|
|
'timestamp' => $this->clock->time()->toUnixTimestamp(),
|
|
'baseline_key' => $key,
|
|
'sample_count' => $sampleCount,
|
|
'operation' => 'update',
|
|
];
|
|
|
|
// Limit metrics history
|
|
if (count($this->performanceMetrics) > 1000) {
|
|
array_shift($this->performanceMetrics);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get performance metrics
|
|
*/
|
|
public function getPerformanceMetrics(): array
|
|
{
|
|
return $this->performanceMetrics;
|
|
}
|
|
|
|
/**
|
|
* Get configuration
|
|
*/
|
|
public function getConfiguration(): array
|
|
{
|
|
return [
|
|
'baseline_update_interval_ms' => $this->baselineUpdateInterval->toMilliseconds(),
|
|
'baseline_max_age_ms' => $this->baselineMaxAge->toMilliseconds(),
|
|
'min_samples_for_baseline' => $this->minSamplesForBaseline,
|
|
'max_samples_per_baseline' => $this->maxSamplesPerBaseline,
|
|
'learning_rate' => $this->learningRate,
|
|
'enable_adaptive_baselines' => $this->enableAdaptiveBaselines,
|
|
'enable_seasonal_adjustment' => $this->enableSeasonalAdjustment,
|
|
'active_baselines' => count($this->baselines),
|
|
'total_feature_history' => array_sum(array_map('count', $this->featureHistory)),
|
|
];
|
|
}
|
|
}
|