Files
michaelschiemer/src/Framework/Waf/MachineLearning/BaselineManager.php
Michael Schiemer fc3d7e6357 feat(Production): Complete production deployment infrastructure
- Add comprehensive health check system with multiple endpoints
- Add Prometheus metrics endpoint
- Add production logging configurations (5 strategies)
- Add complete deployment documentation suite:
  * QUICKSTART.md - 30-minute deployment guide
  * DEPLOYMENT_CHECKLIST.md - Printable verification checklist
  * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle
  * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference
  * production-logging.md - Logging configuration guide
  * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation
  * README.md - Navigation hub
  * DEPLOYMENT_SUMMARY.md - Executive summary
- Add deployment scripts and automation
- Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment
- Update README with production-ready features

All production infrastructure is now complete and ready for deployment.
2025-10-25 19:18:37 +02:00

531 lines
17 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\Waf\MachineLearning;
use App\Framework\Core\ValueObjects\Duration;
use App\Framework\Core\ValueObjects\Percentage;
use App\Framework\DateTime\Clock;
use App\Framework\MachineLearning\ValueObjects\Baseline;
use App\Framework\MachineLearning\ValueObjects\Feature;
use App\Framework\MachineLearning\ValueObjects\FeatureType;
/**
* Manages behavioral baselines for anomaly detection
*/
final class BaselineManager
{
public function __construct(
private readonly Clock $clock,
private readonly Duration $baselineUpdateInterval,
private readonly Duration $baselineMaxAge,
private readonly int $minSamplesForBaseline = 50,
private readonly int $maxSamplesPerBaseline = 10000,
private readonly float $learningRate = 0.1,
private readonly bool $enableAdaptiveBaselines = true,
private readonly bool $enableSeasonalAdjustment = true,
private array $baselines = [],
private array $featureHistory = [],
private array $updateTimestamps = [],
private array $performanceMetrics = []
) {
}
/**
* Get baseline for a specific behavior type and feature
*/
public function getBaseline(FeatureType $behaviorType, string $featureName = 'default'): ?Baseline
{
$key = $this->generateBaselineKey($behaviorType, $featureName);
if (! isset($this->baselines[$key])) {
return null;
}
$baseline = $this->baselines[$key];
// Check if baseline is too old
if ($this->isBaselineExpired($baseline)) {
unset($this->baselines[$key]);
return null;
}
return $baseline;
}
/**
* Update baseline with new feature data
*/
public function updateBaseline(Feature $feature): void
{
$key = $this->generateBaselineKey($feature->type, $feature->name);
// Record feature in history
$this->recordFeature($key, $feature);
// Check if update is needed
if (! $this->shouldUpdateBaseline($key)) {
return;
}
$existingBaseline = $this->baselines[$key] ?? null;
$featureHistory = $this->featureHistory[$key] ?? [];
if (count($featureHistory) < $this->minSamplesForBaseline) {
return;
}
// Create or update baseline
if ($existingBaseline === null) {
$this->baselines[$key] = $this->createInitialBaseline($feature->type, $feature->name, $featureHistory);
} else {
$this->baselines[$key] = $this->updateExistingBaseline($existingBaseline, $featureHistory);
}
$this->updateTimestamps[$key] = $this->clock->time();
// Record performance metrics
$this->recordBaselineUpdate($key, count($featureHistory));
}
/**
* Update baseline incrementally with new feature
*/
public function updateBaselineIncremental(Feature $feature): void
{
$key = $this->generateBaselineKey($feature->type, $feature->name);
$existingBaseline = $this->baselines[$key] ?? null;
if ($existingBaseline === null) {
// Need enough samples for initial baseline
$this->updateBaseline($feature);
return;
}
// Incremental update using exponential moving average
$newBaseline = $this->incrementalUpdate($existingBaseline, $feature);
$this->baselines[$key] = $newBaseline;
$this->updateTimestamps[$key] = $this->clock->time();
// Record feature for history
$this->recordFeature($key, $feature);
}
/**
* Get all baselines for a behavior type
*/
public function getBaselinesForFeatureType(FeatureType $behaviorType): array
{
$baselines = [];
foreach ($this->baselines as $key => $baseline) {
if ($baseline->type === $behaviorType && ! $this->isBaselineExpired($baseline)) {
$baselines[$key] = $baseline;
}
}
return $baselines;
}
/**
* Clean expired baselines
*/
public function cleanExpiredBaselines(): int
{
$removedCount = 0;
foreach ($this->baselines as $key => $baseline) {
if ($this->isBaselineExpired($baseline)) {
unset($this->baselines[$key]);
unset($this->featureHistory[$key]);
unset($this->updateTimestamps[$key]);
$removedCount++;
}
}
return $removedCount;
}
/**
* Get baseline statistics
*/
public function getBaselineStats(): array
{
$totalBaselines = count($this->baselines);
$expiredBaselines = 0;
$avgSampleSize = 0;
$avgAge = 0;
$now = $this->clock->time();
foreach ($this->baselines as $baseline) {
if ($this->isBaselineExpired($baseline)) {
$expiredBaselines++;
}
$avgSampleSize += $baseline->sampleSize;
$age = $baseline->lastUpdated->diff($now);
$avgAge += $age->toSeconds();
}
if ($totalBaselines > 0) {
$avgSampleSize /= $totalBaselines;
$avgAge /= $totalBaselines;
}
return [
'total_baselines' => $totalBaselines,
'expired_baselines' => $expiredBaselines,
'active_baselines' => $totalBaselines - $expiredBaselines,
'avg_sample_size' => $avgSampleSize,
'avg_age_seconds' => $avgAge,
'behavior_types' => $this->getBaselineFeatureTypes(),
'feature_history_size' => array_sum(array_map('count', $this->featureHistory)),
];
}
/**
* Create initial baseline from feature history
*/
private function createInitialBaseline(FeatureType $behaviorType, string $featureName, array $featureHistory): Baseline
{
$values = array_map(fn (Feature $f) => $f->value, $featureHistory);
$stats = $this->calculateStatistics($values);
$confidence = $this->calculateBaselineConfidence($stats['sample_size']);
// Apply seasonal adjustment if enabled
if ($this->enableSeasonalAdjustment) {
$stats = $this->applySeasonalAdjustment($stats, $featureHistory);
}
return new Baseline(
type: $behaviorType,
mean: $stats['mean'],
standardDeviation: $stats['std_dev'],
sampleSize: $stats['sample_size'],
p50: $stats['p50'],
p95: $stats['p95'],
p99: $stats['p99'],
confidence: $confidence,
lastUpdated: $this->clock->time(),
metadata: [
'feature_name' => $featureName,
'creation_method' => 'initial',
'seasonal_adjusted' => $this->enableSeasonalAdjustment,
]
);
}
/**
* Update existing baseline with new data
*/
private function updateExistingBaseline(Baseline $existingBaseline, array $featureHistory): Baseline
{
if (! $this->enableAdaptiveBaselines) {
return $existingBaseline;
}
$values = array_map(fn (Feature $f) => $f->value, $featureHistory);
$newStats = $this->calculateStatistics($values);
// Adaptive learning rate based on sample size and confidence
$adaptiveLearningRate = $this->calculateAdaptiveLearningRate($existingBaseline, $newStats['sample_size']);
// Exponential moving average update
$updatedMean = $existingBaseline->mean * (1 - $adaptiveLearningRate) + $newStats['mean'] * $adaptiveLearningRate;
$updatedStdDev = $existingBaseline->standardDeviation * (1 - $adaptiveLearningRate) + $newStats['std_dev'] * $adaptiveLearningRate;
// Update percentiles with recent data
$combinedValues = array_slice($values, -$this->maxSamplesPerBaseline);
sort($combinedValues);
$p50 = $this->calculatePercentile($combinedValues, 50);
$p95 = $this->calculatePercentile($combinedValues, 95);
$p99 = $this->calculatePercentile($combinedValues, 99);
$newSampleSize = min($existingBaseline->sampleSize + $newStats['sample_size'], $this->maxSamplesPerBaseline);
$confidence = $this->calculateBaselineConfidence($newSampleSize);
return new Baseline(
type: $existingBaseline->type,
mean: $updatedMean,
standardDeviation: $updatedStdDev,
sampleSize: $newSampleSize,
p50: $p50,
p95: $p95,
p99: $p99,
confidence: $confidence,
lastUpdated: $this->clock->time(),
metadata: array_merge($existingBaseline->metadata, [
'update_method' => 'adaptive',
'learning_rate' => $adaptiveLearningRate,
'updates_count' => ($existingBaseline->metadata['updates_count'] ?? 0) + 1,
])
);
}
/**
* Incremental update using exponential moving average
*/
private function incrementalUpdate(Baseline $baseline, Feature $newFeature): Baseline
{
$learningRate = $this->calculateAdaptiveLearningRate($baseline, 1);
$updatedMean = $baseline->mean * (1 - $learningRate) + $newFeature->value * $learningRate;
// Update variance using Welford's online algorithm
$delta = $newFeature->value - $baseline->mean;
$delta2 = $newFeature->value - $updatedMean;
$variance = pow($baseline->standardDeviation, 2);
$updatedVariance = $variance * (1 - $learningRate) + $delta * $delta2 * $learningRate;
$updatedStdDev = sqrt(max(0, $updatedVariance));
return new Baseline(
type: $baseline->type,
mean: $updatedMean,
standardDeviation: $updatedStdDev,
sampleSize: $baseline->sampleSize + 1,
p50: $baseline->p50, // Keep existing percentiles for incremental updates
p95: $baseline->p95,
p99: $baseline->p99,
confidence: $baseline->confidence,
lastUpdated: $this->clock->time(),
metadata: array_merge($baseline->metadata, [
'update_method' => 'incremental',
'last_value' => $newFeature->value,
])
);
}
/**
* Calculate statistics from values
*/
private function calculateStatistics(array $values): array
{
if (empty($values)) {
return [
'mean' => 0.0,
'std_dev' => 0.0,
'sample_size' => 0,
'p50' => 0.0,
'p95' => 0.0,
'p99' => 0.0,
];
}
$mean = array_sum($values) / count($values);
$variance = array_sum(array_map(fn ($v) => pow($v - $mean, 2), $values)) / count($values);
$stdDev = sqrt($variance);
sort($values);
return [
'mean' => $mean,
'std_dev' => $stdDev,
'sample_size' => count($values),
'p50' => $this->calculatePercentile($values, 50),
'p95' => $this->calculatePercentile($values, 95),
'p99' => $this->calculatePercentile($values, 99),
];
}
/**
* Calculate percentile from sorted values
*/
private function calculatePercentile(array $sortedValues, float $percentile): float
{
if (empty($sortedValues)) {
return 0.0;
}
$index = ($percentile / 100) * (count($sortedValues) - 1);
$lower = (int)floor($index);
$upper = (int)ceil($index);
if ($lower === $upper) {
return $sortedValues[$lower];
}
$weight = $index - $lower;
return $sortedValues[$lower] * (1 - $weight) + $sortedValues[$upper] * $weight;
}
/**
* Calculate adaptive learning rate
*/
private function calculateAdaptiveLearningRate(Baseline $baseline, int $newSamples): float
{
// Decrease learning rate as confidence increases
$confidenceFactor = 1.0 - ($baseline->confidence->getValue() / 100.0);
// Increase learning rate for more new samples
$sampleFactor = min(1.0, $newSamples / 10.0);
return $this->learningRate * $confidenceFactor * $sampleFactor;
}
/**
* Calculate baseline confidence based on sample size
*/
private function calculateBaselineConfidence(int $sampleSize): Percentage
{
// Confidence increases with sample size, plateaus at 95%
$confidence = min(95.0, ($sampleSize / $this->minSamplesForBaseline) * 75.0);
return new Percentage(max(0.0, $confidence));
}
/**
* Apply seasonal adjustment to statistics
*/
private function applySeasonalAdjustment(array $stats, array $featureHistory): array
{
// Simple seasonal adjustment based on time of day/week patterns
$hourCounts = array_fill(0, 24, 0);
$dayOfWeekCounts = array_fill(0, 7, 0);
foreach ($featureHistory as $feature) {
if ($feature->timestamp !== null) {
$hour = (int)$feature->timestamp->format('H');
$dayOfWeek = (int)$feature->timestamp->format('w');
$hourCounts[$hour]++;
$dayOfWeekCounts[$dayOfWeek]++;
}
}
// Calculate seasonal factors (simplified)
$currentHour = (int)$this->clock->time()->format('H');
$currentDayOfWeek = (int)$this->clock->time()->format('w');
$avgHourlyCount = array_sum($hourCounts) / 24;
$avgDailyCount = array_sum($dayOfWeekCounts) / 7;
$hourlyFactor = $avgHourlyCount > 0 ? $hourCounts[$currentHour] / $avgHourlyCount : 1.0;
$dailyFactor = $avgDailyCount > 0 ? $dayOfWeekCounts[$currentDayOfWeek] / $avgDailyCount : 1.0;
// Apply seasonal adjustment (conservative)
$seasonalFactor = ($hourlyFactor + $dailyFactor) / 2;
$adjustmentWeight = 0.1; // Limit seasonal impact
$stats['mean'] *= (1 + ($seasonalFactor - 1) * $adjustmentWeight);
$stats['std_dev'] *= (1 + abs($seasonalFactor - 1) * $adjustmentWeight);
return $stats;
}
/**
* Record feature in history
*/
private function recordFeature(string $key, Feature $feature): void
{
if (! isset($this->featureHistory[$key])) {
$this->featureHistory[$key] = [];
}
$this->featureHistory[$key][] = $feature;
// Limit history size
if (count($this->featureHistory[$key]) > $this->maxSamplesPerBaseline) {
array_shift($this->featureHistory[$key]);
}
}
/**
* Check if baseline should be updated
*/
private function shouldUpdateBaseline(string $key): bool
{
$lastUpdate = $this->updateTimestamps[$key] ?? null;
if ($lastUpdate === null) {
return true;
}
$timeSinceUpdate = $lastUpdate->diff($this->clock->time());
return $timeSinceUpdate->toMilliseconds() >= $this->baselineUpdateInterval->toMilliseconds();
}
/**
* Check if baseline is expired
*/
private function isBaselineExpired(Baseline $baseline): bool
{
$age = $baseline->lastUpdated->diff($this->clock->time());
return $age->toMilliseconds() > $this->baselineMaxAge->toMilliseconds();
}
/**
* Generate baseline key
*/
private function generateBaselineKey(FeatureType $behaviorType, string $featureName): string
{
return $behaviorType->value . ':' . $featureName;
}
/**
* Get behavior types with baselines
*/
private function getBaselineFeatureTypes(): array
{
$types = [];
foreach ($this->baselines as $baseline) {
$types[] = $baseline->type->value;
}
return array_unique($types);
}
/**
* Record baseline update metrics
*/
private function recordBaselineUpdate(string $key, int $sampleCount): void
{
$this->performanceMetrics[] = [
'timestamp' => $this->clock->time()->toUnixTimestamp(),
'baseline_key' => $key,
'sample_count' => $sampleCount,
'operation' => 'update',
];
// Limit metrics history
if (count($this->performanceMetrics) > 1000) {
array_shift($this->performanceMetrics);
}
}
/**
* Get performance metrics
*/
public function getPerformanceMetrics(): array
{
return $this->performanceMetrics;
}
/**
* Get configuration
*/
public function getConfiguration(): array
{
return [
'baseline_update_interval_ms' => $this->baselineUpdateInterval->toMilliseconds(),
'baseline_max_age_ms' => $this->baselineMaxAge->toMilliseconds(),
'min_samples_for_baseline' => $this->minSamplesForBaseline,
'max_samples_per_baseline' => $this->maxSamplesPerBaseline,
'learning_rate' => $this->learningRate,
'enable_adaptive_baselines' => $this->enableAdaptiveBaselines,
'enable_seasonal_adjustment' => $this->enableSeasonalAdjustment,
'active_baselines' => count($this->baselines),
'total_feature_history' => array_sum(array_map('count', $this->featureHistory)),
];
}
}