Files
michaelschiemer/src/Framework/Waf/MachineLearning/Extractors/FrequencyFeatureExtractor.php
Michael Schiemer fc3d7e6357 feat(Production): Complete production deployment infrastructure
- Add comprehensive health check system with multiple endpoints
- Add Prometheus metrics endpoint
- Add production logging configurations (5 strategies)
- Add complete deployment documentation suite:
  * QUICKSTART.md - 30-minute deployment guide
  * DEPLOYMENT_CHECKLIST.md - Printable verification checklist
  * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle
  * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference
  * production-logging.md - Logging configuration guide
  * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation
  * README.md - Navigation hub
  * DEPLOYMENT_SUMMARY.md - Executive summary
- Add deployment scripts and automation
- Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment
- Update README with production-ready features

All production infrastructure is now complete and ready for deployment.
2025-10-25 19:18:37 +02:00

581 lines
17 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
declare(strict_types=1);
namespace App\Framework\Waf\MachineLearning\Extractors;
use App\Framework\Core\ValueObjects\Timestamp;
use App\Framework\MachineLearning\Core\FeatureExtractorMetadata;
use App\Framework\MachineLearning\Core\FeatureExtractorPerformance;
use App\Framework\MachineLearning\ValueObjects\Feature;
use App\Framework\MachineLearning\ValueObjects\FeatureType;
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
use App\Framework\Waf\MachineLearning\WafFeatureExtractor;
/**
* Extracts request frequency and rate-based behavioral features
*
* Uses atomic interface composition pattern (NO extends):
* - WafFeatureExtractor: Domain-specific feature extraction
* - FeatureExtractorMetadata: Metadata and configuration
* - FeatureExtractorPerformance: Performance characteristics
*/
final class FrequencyFeatureExtractor implements
WafFeatureExtractor,
FeatureExtractorMetadata,
FeatureExtractorPerformance
{
public function __construct(
private readonly bool $enabled = true,
private readonly int $timeWindowSeconds = 300, // 5 minutes
private readonly int $maxStoredRequests = 1000,
private readonly float $burstThreshold = 10.0, // requests per second
private array $requestHistory = [] // In-memory storage (would be Redis in production)
) {
}
public function getFeatureType(): FeatureType
{
return FeatureType::FREQUENCY;
}
public function canExtract(RequestAnalysisData $requestData): bool
{
return $requestData->clientIp !== null;
}
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
{
$clientId = $this->getClientId($requestData);
$currentTime = $requestData->timestamp ?? Timestamp::now();
// Record current request
$this->recordRequest($clientId, $currentTime);
// Clean old requests
$this->cleanOldRequests($clientId, $currentTime);
// Get request history for analysis
$requests = $this->getRequestHistory($clientId, $currentTime);
if (empty($requests)) {
return [];
}
$features = [];
// Basic frequency features
$features[] = $this->extractRequestRate($requests, $this->timeWindowSeconds);
$features[] = $this->extractBurstRate($requests, 60); // 1 minute bursts
$features[] = $this->extractSustainedRate($requests, 1800); // 30 minute sustained
// Pattern-based features
$features[] = $this->extractInterArrivalVariance($requests);
$features[] = $this->extractRequestSpacing($requests);
$features[] = $this->extractPeriodicityScore($requests);
// Time-based features
$features[] = $this->extractTimeOfDayPattern($requests);
$features[] = $this->extractWeekdayPattern($requests);
// Advanced statistical features
$features[] = $this->extractFrequencyEntropy($requests);
$features[] = $this->extractBurstiness($requests);
return array_filter($features);
}
/**
* Extract basic request rate
*/
private function extractRequestRate(array $requests, int $windowSeconds): Feature
{
$count = count($requests);
$rate = $windowSeconds > 0 ? $count / $windowSeconds : 0.0;
return Feature::frequency(
name: "request_rate_{$windowSeconds}s",
count: $count,
timeWindow: $windowSeconds
);
}
/**
* Extract burst detection rate
*/
private function extractBurstRate(array $requests, int $windowSeconds): Feature
{
if (count($requests) < 2) {
return Feature::create(
type: $this->getFeatureType(),
name: "burst_rate_{$windowSeconds}s",
value: 0.0,
unit: 'requests/second'
);
}
$maxRate = 0.0;
$windowSize = $windowSeconds;
// Sliding window to find maximum rate
for ($i = 0; $i < count($requests) - 1; $i++) {
$windowStart = $requests[$i];
$requestsInWindow = 0;
for ($j = $i; $j < count($requests); $j++) {
if ($requests[$j] - $windowStart <= $windowSize) {
$requestsInWindow++;
} else {
break;
}
}
$rate = $requestsInWindow / $windowSize;
$maxRate = max($maxRate, $rate);
}
return Feature::create(
type: $this->getFeatureType(),
name: "burst_rate_{$windowSeconds}s",
value: $maxRate,
unit: 'requests/second'
);
}
/**
* Extract sustained rate (longer window)
*/
private function extractSustainedRate(array $requests, int $windowSeconds): Feature
{
$count = count($requests);
// Filter requests within the sustained window
$currentTime = time();
$sustainedRequests = array_filter(
$requests,
fn ($timestamp) => ($currentTime - $timestamp) <= $windowSeconds
);
$sustainedCount = count($sustainedRequests);
$rate = $windowSeconds > 0 ? $sustainedCount / $windowSeconds : 0.0;
return Feature::create(
type: $this->getFeatureType(),
name: "sustained_rate_{$windowSeconds}s",
value: $rate,
unit: 'requests/second'
);
}
/**
* Extract inter-arrival time variance
*/
private function extractInterArrivalVariance(array $requests): Feature
{
if (count($requests) < 3) {
return Feature::create(
type: $this->getFeatureType(),
name: 'inter_arrival_variance',
value: 0.0,
unit: 'seconds²'
);
}
// Sort requests by timestamp
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
return Feature::statistical(
type: $this->getFeatureType(),
name: 'inter_arrival_variance',
values: $interArrivals,
statistic: 'variance'
);
}
/**
* Extract request spacing regularity
*/
private function extractRequestSpacing(array $requests): Feature
{
if (count($requests) < 3) {
return Feature::create(
type: $this->getFeatureType(),
name: 'request_spacing_regularity',
value: 0.0,
unit: 'coefficient'
);
}
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
$mean = array_sum($interArrivals) / count($interArrivals);
$variance = Feature::statistical(
type: $this->getFeatureType(),
name: 'temp_variance',
values: $interArrivals,
statistic: 'variance'
)->value;
// Coefficient of variation (lower = more regular)
$regularity = $mean > 0 ? sqrt($variance) / $mean : 1.0;
return Feature::create(
type: $this->getFeatureType(),
name: 'request_spacing_regularity',
value: 1.0 / (1.0 + $regularity), // Normalize: 1 = perfectly regular, 0 = very irregular
unit: 'regularity_score'
);
}
/**
* Extract periodicity score using autocorrelation
*/
private function extractPeriodicityScore(array $requests): Feature
{
if (count($requests) < 10) {
return Feature::create(
type: $this->getFeatureType(),
name: 'periodicity_score',
value: 0.0,
unit: 'correlation'
);
}
sort($requests);
// Create time series with 1-second buckets
$minTime = min($requests);
$maxTime = max($requests);
$duration = $maxTime - $minTime;
if ($duration <= 0) {
return Feature::create(
type: $this->getFeatureType(),
name: 'periodicity_score',
value: 0.0,
unit: 'correlation'
);
}
// Create histogram
$buckets = [];
foreach ($requests as $timestamp) {
$bucket = (int)($timestamp - $minTime);
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
}
// Calculate autocorrelation for common periods (10s, 30s, 60s)
$maxCorrelation = 0.0;
$periods = [10, 30, 60];
foreach ($periods as $period) {
if ($period >= $duration) {
continue;
}
$correlation = $this->calculateAutocorrelation($buckets, $period, (int)$duration);
$maxCorrelation = max($maxCorrelation, abs($correlation));
}
return Feature::create(
type: $this->getFeatureType(),
name: 'periodicity_score',
value: $maxCorrelation,
unit: 'correlation'
);
}
/**
* Extract time of day pattern
*/
private function extractTimeOfDayPattern(array $requests): Feature
{
if (empty($requests)) {
return Feature::create(
type: $this->getFeatureType(),
name: 'time_of_day_entropy',
value: 0.0,
unit: 'bits'
);
}
// Group by hour of day
$hourDistribution = array_fill(0, 24, 0);
foreach ($requests as $timestamp) {
$hour = (int)date('H', $timestamp);
$hourDistribution[$hour]++;
}
return Feature::entropy(
type: $this->getFeatureType(),
name: 'time_of_day_entropy',
distribution: $hourDistribution
);
}
/**
* Extract weekday pattern
*/
private function extractWeekdayPattern(array $requests): Feature
{
if (empty($requests)) {
return Feature::create(
type: $this->getFeatureType(),
name: 'weekday_entropy',
value: 0.0,
unit: 'bits'
);
}
// Group by day of week (0 = Sunday, 6 = Saturday)
$dayDistribution = array_fill(0, 7, 0);
foreach ($requests as $timestamp) {
$day = (int)date('w', $timestamp);
$dayDistribution[$day]++;
}
return Feature::entropy(
type: $this->getFeatureType(),
name: 'weekday_entropy',
distribution: $dayDistribution
);
}
/**
* Extract frequency distribution entropy
*/
private function extractFrequencyEntropy(array $requests): Feature
{
if (count($requests) < 5) {
return Feature::create(
type: $this->getFeatureType(),
name: 'frequency_entropy',
value: 0.0,
unit: 'bits'
);
}
// Create frequency distribution in 10-second buckets
$buckets = [];
$minTime = min($requests);
$bucketSize = 10; // seconds
foreach ($requests as $timestamp) {
$bucket = (int)(($timestamp - $minTime) / $bucketSize);
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
}
return Feature::entropy(
type: $this->getFeatureType(),
name: 'frequency_entropy',
distribution: array_values($buckets)
);
}
/**
* Extract burstiness measure
*/
private function extractBurstiness(array $requests): Feature
{
if (count($requests) < 5) {
return Feature::create(
type: $this->getFeatureType(),
name: 'burstiness',
value: 0.0,
unit: 'burstiness_coefficient'
);
}
sort($requests);
// Calculate inter-arrival times
$interArrivals = [];
for ($i = 1; $i < count($requests); $i++) {
$interArrivals[] = $requests[$i] - $requests[$i - 1];
}
$mean = array_sum($interArrivals) / count($interArrivals);
$variance = Feature::statistical(
type: $this->getFeatureType(),
name: 'temp_variance',
values: $interArrivals,
statistic: 'variance'
)->value;
// Burstiness coefficient: (σ - μ) / (σ + μ)
// Range: -1 (regular) to +1 (bursty)
$stdDev = sqrt($variance);
$burstiness = ($stdDev + $mean) > 0 ? ($stdDev - $mean) / ($stdDev + $mean) : 0.0;
return Feature::create(
type: $this->getFeatureType(),
name: 'burstiness',
value: $burstiness,
unit: 'burstiness_coefficient'
);
}
/**
* Record a request timestamp
*/
private function recordRequest(string $clientId, Timestamp $timestamp): void
{
if (! isset($this->requestHistory[$clientId])) {
$this->requestHistory[$clientId] = [];
}
$this->requestHistory[$clientId][] = $timestamp->toUnixTimestamp();
// Limit memory usage
if (count($this->requestHistory[$clientId]) > $this->maxStoredRequests) {
array_shift($this->requestHistory[$clientId]);
}
}
/**
* Clean old requests outside the analysis window
*/
private function cleanOldRequests(string $clientId, Timestamp $currentTime): void
{
if (! isset($this->requestHistory[$clientId])) {
return;
}
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
$this->requestHistory[$clientId] = array_filter(
$this->requestHistory[$clientId],
fn ($timestamp) => $timestamp >= $cutoffTime
);
}
/**
* Get request history for analysis
*/
private function getRequestHistory(string $clientId, Timestamp $currentTime): array
{
if (! isset($this->requestHistory[$clientId])) {
return [];
}
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
return array_filter(
$this->requestHistory[$clientId],
fn ($timestamp) => $timestamp >= $cutoffTime
);
}
/**
* Get client identifier
*/
private function getClientId(RequestAnalysisData $requestData): string
{
// Prefer session ID, fallback to IP address
if (! empty($requestData->sessionId)) {
return 'session:' . $requestData->sessionId;
}
if ($requestData->clientIp !== null) {
return 'ip:' . $requestData->clientIp->toString();
}
return 'unknown';
}
/**
* Calculate autocorrelation for a given lag
*/
private function calculateAutocorrelation(array $buckets, int $lag, int $duration): float
{
if ($lag >= $duration || $lag <= 0) {
return 0.0;
}
$sum = 0.0;
$sumSquares = 0.0;
$count = 0;
for ($i = 0; $i < $duration - $lag; $i++) {
$x = $buckets[$i] ?? 0;
$y = $buckets[$i + $lag] ?? 0;
$sum += $x * $y;
$sumSquares += $x * $x + $y * $y;
$count++;
}
if ($count === 0 || $sumSquares === 0) {
return 0.0;
}
return $sum / sqrt($sumSquares / 2);
}
public function getFeatureNames(): array
{
return [
"request_rate_{$this->timeWindowSeconds}s",
'burst_rate_60s',
'sustained_rate_1800s',
'inter_arrival_variance',
'request_spacing_regularity',
'periodicity_score',
'time_of_day_entropy',
'weekday_entropy',
'frequency_entropy',
'burstiness',
];
}
public function getConfiguration(): array
{
return [
'enabled' => $this->enabled,
'time_window_seconds' => $this->timeWindowSeconds,
'max_stored_requests' => $this->maxStoredRequests,
'burst_threshold' => $this->burstThreshold,
'feature_count' => count($this->getFeatureNames()),
];
}
public function isEnabled(): bool
{
return $this->enabled;
}
public function getPriority(): int
{
return 100; // High priority for frequency analysis
}
public function getExpectedProcessingTime(): int
{
return 50; // milliseconds
}
public function supportsParallelExecution(): bool
{
return false; // Needs sequential access for request history
}
public function getDependencies(): array
{
return []; // No dependencies
}
}