- Add comprehensive health check system with multiple endpoints - Add Prometheus metrics endpoint - Add production logging configurations (5 strategies) - Add complete deployment documentation suite: * QUICKSTART.md - 30-minute deployment guide * DEPLOYMENT_CHECKLIST.md - Printable verification checklist * DEPLOYMENT_WORKFLOW.md - Complete deployment lifecycle * PRODUCTION_DEPLOYMENT.md - Comprehensive technical reference * production-logging.md - Logging configuration guide * ANSIBLE_DEPLOYMENT.md - Infrastructure as Code automation * README.md - Navigation hub * DEPLOYMENT_SUMMARY.md - Executive summary - Add deployment scripts and automation - Add DEPLOYMENT_PLAN.md - Concrete plan for immediate deployment - Update README with production-ready features All production infrastructure is now complete and ready for deployment.
581 lines
17 KiB
PHP
581 lines
17 KiB
PHP
<?php
|
||
|
||
declare(strict_types=1);
|
||
|
||
namespace App\Framework\Waf\MachineLearning\Extractors;
|
||
|
||
use App\Framework\Core\ValueObjects\Timestamp;
|
||
use App\Framework\MachineLearning\Core\FeatureExtractorMetadata;
|
||
use App\Framework\MachineLearning\Core\FeatureExtractorPerformance;
|
||
use App\Framework\MachineLearning\ValueObjects\Feature;
|
||
use App\Framework\MachineLearning\ValueObjects\FeatureType;
|
||
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
|
||
use App\Framework\Waf\MachineLearning\WafFeatureExtractor;
|
||
|
||
/**
|
||
* Extracts request frequency and rate-based behavioral features
|
||
*
|
||
* Uses atomic interface composition pattern (NO extends):
|
||
* - WafFeatureExtractor: Domain-specific feature extraction
|
||
* - FeatureExtractorMetadata: Metadata and configuration
|
||
* - FeatureExtractorPerformance: Performance characteristics
|
||
*/
|
||
final class FrequencyFeatureExtractor implements
|
||
WafFeatureExtractor,
|
||
FeatureExtractorMetadata,
|
||
FeatureExtractorPerformance
|
||
{
|
||
public function __construct(
|
||
private readonly bool $enabled = true,
|
||
private readonly int $timeWindowSeconds = 300, // 5 minutes
|
||
private readonly int $maxStoredRequests = 1000,
|
||
private readonly float $burstThreshold = 10.0, // requests per second
|
||
private array $requestHistory = [] // In-memory storage (would be Redis in production)
|
||
) {
|
||
}
|
||
|
||
public function getFeatureType(): FeatureType
|
||
{
|
||
return FeatureType::FREQUENCY;
|
||
}
|
||
|
||
public function canExtract(RequestAnalysisData $requestData): bool
|
||
{
|
||
return $requestData->clientIp !== null;
|
||
}
|
||
|
||
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
|
||
{
|
||
$clientId = $this->getClientId($requestData);
|
||
$currentTime = $requestData->timestamp ?? Timestamp::now();
|
||
|
||
// Record current request
|
||
$this->recordRequest($clientId, $currentTime);
|
||
|
||
// Clean old requests
|
||
$this->cleanOldRequests($clientId, $currentTime);
|
||
|
||
// Get request history for analysis
|
||
$requests = $this->getRequestHistory($clientId, $currentTime);
|
||
|
||
if (empty($requests)) {
|
||
return [];
|
||
}
|
||
|
||
$features = [];
|
||
|
||
// Basic frequency features
|
||
$features[] = $this->extractRequestRate($requests, $this->timeWindowSeconds);
|
||
$features[] = $this->extractBurstRate($requests, 60); // 1 minute bursts
|
||
$features[] = $this->extractSustainedRate($requests, 1800); // 30 minute sustained
|
||
|
||
// Pattern-based features
|
||
$features[] = $this->extractInterArrivalVariance($requests);
|
||
$features[] = $this->extractRequestSpacing($requests);
|
||
$features[] = $this->extractPeriodicityScore($requests);
|
||
|
||
// Time-based features
|
||
$features[] = $this->extractTimeOfDayPattern($requests);
|
||
$features[] = $this->extractWeekdayPattern($requests);
|
||
|
||
// Advanced statistical features
|
||
$features[] = $this->extractFrequencyEntropy($requests);
|
||
$features[] = $this->extractBurstiness($requests);
|
||
|
||
return array_filter($features);
|
||
}
|
||
|
||
/**
|
||
* Extract basic request rate
|
||
*/
|
||
private function extractRequestRate(array $requests, int $windowSeconds): Feature
|
||
{
|
||
$count = count($requests);
|
||
$rate = $windowSeconds > 0 ? $count / $windowSeconds : 0.0;
|
||
|
||
return Feature::frequency(
|
||
name: "request_rate_{$windowSeconds}s",
|
||
count: $count,
|
||
timeWindow: $windowSeconds
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract burst detection rate
|
||
*/
|
||
private function extractBurstRate(array $requests, int $windowSeconds): Feature
|
||
{
|
||
if (count($requests) < 2) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: "burst_rate_{$windowSeconds}s",
|
||
value: 0.0,
|
||
unit: 'requests/second'
|
||
);
|
||
}
|
||
|
||
$maxRate = 0.0;
|
||
$windowSize = $windowSeconds;
|
||
|
||
// Sliding window to find maximum rate
|
||
for ($i = 0; $i < count($requests) - 1; $i++) {
|
||
$windowStart = $requests[$i];
|
||
$requestsInWindow = 0;
|
||
|
||
for ($j = $i; $j < count($requests); $j++) {
|
||
if ($requests[$j] - $windowStart <= $windowSize) {
|
||
$requestsInWindow++;
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
$rate = $requestsInWindow / $windowSize;
|
||
$maxRate = max($maxRate, $rate);
|
||
}
|
||
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: "burst_rate_{$windowSeconds}s",
|
||
value: $maxRate,
|
||
unit: 'requests/second'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract sustained rate (longer window)
|
||
*/
|
||
private function extractSustainedRate(array $requests, int $windowSeconds): Feature
|
||
{
|
||
$count = count($requests);
|
||
|
||
// Filter requests within the sustained window
|
||
$currentTime = time();
|
||
$sustainedRequests = array_filter(
|
||
$requests,
|
||
fn ($timestamp) => ($currentTime - $timestamp) <= $windowSeconds
|
||
);
|
||
|
||
$sustainedCount = count($sustainedRequests);
|
||
$rate = $windowSeconds > 0 ? $sustainedCount / $windowSeconds : 0.0;
|
||
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: "sustained_rate_{$windowSeconds}s",
|
||
value: $rate,
|
||
unit: 'requests/second'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract inter-arrival time variance
|
||
*/
|
||
private function extractInterArrivalVariance(array $requests): Feature
|
||
{
|
||
if (count($requests) < 3) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'inter_arrival_variance',
|
||
value: 0.0,
|
||
unit: 'seconds²'
|
||
);
|
||
}
|
||
|
||
// Sort requests by timestamp
|
||
sort($requests);
|
||
|
||
// Calculate inter-arrival times
|
||
$interArrivals = [];
|
||
for ($i = 1; $i < count($requests); $i++) {
|
||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||
}
|
||
|
||
return Feature::statistical(
|
||
type: $this->getFeatureType(),
|
||
name: 'inter_arrival_variance',
|
||
values: $interArrivals,
|
||
statistic: 'variance'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract request spacing regularity
|
||
*/
|
||
private function extractRequestSpacing(array $requests): Feature
|
||
{
|
||
if (count($requests) < 3) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'request_spacing_regularity',
|
||
value: 0.0,
|
||
unit: 'coefficient'
|
||
);
|
||
}
|
||
|
||
sort($requests);
|
||
|
||
// Calculate inter-arrival times
|
||
$interArrivals = [];
|
||
for ($i = 1; $i < count($requests); $i++) {
|
||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||
}
|
||
|
||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||
$variance = Feature::statistical(
|
||
type: $this->getFeatureType(),
|
||
name: 'temp_variance',
|
||
values: $interArrivals,
|
||
statistic: 'variance'
|
||
)->value;
|
||
|
||
// Coefficient of variation (lower = more regular)
|
||
$regularity = $mean > 0 ? sqrt($variance) / $mean : 1.0;
|
||
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'request_spacing_regularity',
|
||
value: 1.0 / (1.0 + $regularity), // Normalize: 1 = perfectly regular, 0 = very irregular
|
||
unit: 'regularity_score'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract periodicity score using autocorrelation
|
||
*/
|
||
private function extractPeriodicityScore(array $requests): Feature
|
||
{
|
||
if (count($requests) < 10) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'periodicity_score',
|
||
value: 0.0,
|
||
unit: 'correlation'
|
||
);
|
||
}
|
||
|
||
sort($requests);
|
||
|
||
// Create time series with 1-second buckets
|
||
$minTime = min($requests);
|
||
$maxTime = max($requests);
|
||
$duration = $maxTime - $minTime;
|
||
|
||
if ($duration <= 0) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'periodicity_score',
|
||
value: 0.0,
|
||
unit: 'correlation'
|
||
);
|
||
}
|
||
|
||
// Create histogram
|
||
$buckets = [];
|
||
foreach ($requests as $timestamp) {
|
||
$bucket = (int)($timestamp - $minTime);
|
||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||
}
|
||
|
||
// Calculate autocorrelation for common periods (10s, 30s, 60s)
|
||
$maxCorrelation = 0.0;
|
||
$periods = [10, 30, 60];
|
||
|
||
foreach ($periods as $period) {
|
||
if ($period >= $duration) {
|
||
continue;
|
||
}
|
||
|
||
$correlation = $this->calculateAutocorrelation($buckets, $period, (int)$duration);
|
||
$maxCorrelation = max($maxCorrelation, abs($correlation));
|
||
}
|
||
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'periodicity_score',
|
||
value: $maxCorrelation,
|
||
unit: 'correlation'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract time of day pattern
|
||
*/
|
||
private function extractTimeOfDayPattern(array $requests): Feature
|
||
{
|
||
if (empty($requests)) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'time_of_day_entropy',
|
||
value: 0.0,
|
||
unit: 'bits'
|
||
);
|
||
}
|
||
|
||
// Group by hour of day
|
||
$hourDistribution = array_fill(0, 24, 0);
|
||
|
||
foreach ($requests as $timestamp) {
|
||
$hour = (int)date('H', $timestamp);
|
||
$hourDistribution[$hour]++;
|
||
}
|
||
|
||
return Feature::entropy(
|
||
type: $this->getFeatureType(),
|
||
name: 'time_of_day_entropy',
|
||
distribution: $hourDistribution
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract weekday pattern
|
||
*/
|
||
private function extractWeekdayPattern(array $requests): Feature
|
||
{
|
||
if (empty($requests)) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'weekday_entropy',
|
||
value: 0.0,
|
||
unit: 'bits'
|
||
);
|
||
}
|
||
|
||
// Group by day of week (0 = Sunday, 6 = Saturday)
|
||
$dayDistribution = array_fill(0, 7, 0);
|
||
|
||
foreach ($requests as $timestamp) {
|
||
$day = (int)date('w', $timestamp);
|
||
$dayDistribution[$day]++;
|
||
}
|
||
|
||
return Feature::entropy(
|
||
type: $this->getFeatureType(),
|
||
name: 'weekday_entropy',
|
||
distribution: $dayDistribution
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract frequency distribution entropy
|
||
*/
|
||
private function extractFrequencyEntropy(array $requests): Feature
|
||
{
|
||
if (count($requests) < 5) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'frequency_entropy',
|
||
value: 0.0,
|
||
unit: 'bits'
|
||
);
|
||
}
|
||
|
||
// Create frequency distribution in 10-second buckets
|
||
$buckets = [];
|
||
$minTime = min($requests);
|
||
$bucketSize = 10; // seconds
|
||
|
||
foreach ($requests as $timestamp) {
|
||
$bucket = (int)(($timestamp - $minTime) / $bucketSize);
|
||
$buckets[$bucket] = ($buckets[$bucket] ?? 0) + 1;
|
||
}
|
||
|
||
return Feature::entropy(
|
||
type: $this->getFeatureType(),
|
||
name: 'frequency_entropy',
|
||
distribution: array_values($buckets)
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Extract burstiness measure
|
||
*/
|
||
private function extractBurstiness(array $requests): Feature
|
||
{
|
||
if (count($requests) < 5) {
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'burstiness',
|
||
value: 0.0,
|
||
unit: 'burstiness_coefficient'
|
||
);
|
||
}
|
||
|
||
sort($requests);
|
||
|
||
// Calculate inter-arrival times
|
||
$interArrivals = [];
|
||
for ($i = 1; $i < count($requests); $i++) {
|
||
$interArrivals[] = $requests[$i] - $requests[$i - 1];
|
||
}
|
||
|
||
$mean = array_sum($interArrivals) / count($interArrivals);
|
||
$variance = Feature::statistical(
|
||
type: $this->getFeatureType(),
|
||
name: 'temp_variance',
|
||
values: $interArrivals,
|
||
statistic: 'variance'
|
||
)->value;
|
||
|
||
// Burstiness coefficient: (σ - μ) / (σ + μ)
|
||
// Range: -1 (regular) to +1 (bursty)
|
||
$stdDev = sqrt($variance);
|
||
$burstiness = ($stdDev + $mean) > 0 ? ($stdDev - $mean) / ($stdDev + $mean) : 0.0;
|
||
|
||
return Feature::create(
|
||
type: $this->getFeatureType(),
|
||
name: 'burstiness',
|
||
value: $burstiness,
|
||
unit: 'burstiness_coefficient'
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Record a request timestamp
|
||
*/
|
||
private function recordRequest(string $clientId, Timestamp $timestamp): void
|
||
{
|
||
if (! isset($this->requestHistory[$clientId])) {
|
||
$this->requestHistory[$clientId] = [];
|
||
}
|
||
|
||
$this->requestHistory[$clientId][] = $timestamp->toUnixTimestamp();
|
||
|
||
// Limit memory usage
|
||
if (count($this->requestHistory[$clientId]) > $this->maxStoredRequests) {
|
||
array_shift($this->requestHistory[$clientId]);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Clean old requests outside the analysis window
|
||
*/
|
||
private function cleanOldRequests(string $clientId, Timestamp $currentTime): void
|
||
{
|
||
if (! isset($this->requestHistory[$clientId])) {
|
||
return;
|
||
}
|
||
|
||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||
|
||
$this->requestHistory[$clientId] = array_filter(
|
||
$this->requestHistory[$clientId],
|
||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Get request history for analysis
|
||
*/
|
||
private function getRequestHistory(string $clientId, Timestamp $currentTime): array
|
||
{
|
||
if (! isset($this->requestHistory[$clientId])) {
|
||
return [];
|
||
}
|
||
|
||
$cutoffTime = $currentTime->toUnixTimestamp() - $this->timeWindowSeconds;
|
||
|
||
return array_filter(
|
||
$this->requestHistory[$clientId],
|
||
fn ($timestamp) => $timestamp >= $cutoffTime
|
||
);
|
||
}
|
||
|
||
/**
|
||
* Get client identifier
|
||
*/
|
||
private function getClientId(RequestAnalysisData $requestData): string
|
||
{
|
||
// Prefer session ID, fallback to IP address
|
||
if (! empty($requestData->sessionId)) {
|
||
return 'session:' . $requestData->sessionId;
|
||
}
|
||
|
||
if ($requestData->clientIp !== null) {
|
||
return 'ip:' . $requestData->clientIp->toString();
|
||
}
|
||
|
||
return 'unknown';
|
||
}
|
||
|
||
/**
|
||
* Calculate autocorrelation for a given lag
|
||
*/
|
||
private function calculateAutocorrelation(array $buckets, int $lag, int $duration): float
|
||
{
|
||
if ($lag >= $duration || $lag <= 0) {
|
||
return 0.0;
|
||
}
|
||
|
||
$sum = 0.0;
|
||
$sumSquares = 0.0;
|
||
$count = 0;
|
||
|
||
for ($i = 0; $i < $duration - $lag; $i++) {
|
||
$x = $buckets[$i] ?? 0;
|
||
$y = $buckets[$i + $lag] ?? 0;
|
||
|
||
$sum += $x * $y;
|
||
$sumSquares += $x * $x + $y * $y;
|
||
$count++;
|
||
}
|
||
|
||
if ($count === 0 || $sumSquares === 0) {
|
||
return 0.0;
|
||
}
|
||
|
||
return $sum / sqrt($sumSquares / 2);
|
||
}
|
||
|
||
public function getFeatureNames(): array
|
||
{
|
||
return [
|
||
"request_rate_{$this->timeWindowSeconds}s",
|
||
'burst_rate_60s',
|
||
'sustained_rate_1800s',
|
||
'inter_arrival_variance',
|
||
'request_spacing_regularity',
|
||
'periodicity_score',
|
||
'time_of_day_entropy',
|
||
'weekday_entropy',
|
||
'frequency_entropy',
|
||
'burstiness',
|
||
];
|
||
}
|
||
|
||
public function getConfiguration(): array
|
||
{
|
||
return [
|
||
'enabled' => $this->enabled,
|
||
'time_window_seconds' => $this->timeWindowSeconds,
|
||
'max_stored_requests' => $this->maxStoredRequests,
|
||
'burst_threshold' => $this->burstThreshold,
|
||
'feature_count' => count($this->getFeatureNames()),
|
||
];
|
||
}
|
||
|
||
public function isEnabled(): bool
|
||
{
|
||
return $this->enabled;
|
||
}
|
||
|
||
public function getPriority(): int
|
||
{
|
||
return 100; // High priority for frequency analysis
|
||
}
|
||
|
||
public function getExpectedProcessingTime(): int
|
||
{
|
||
return 50; // milliseconds
|
||
}
|
||
|
||
public function supportsParallelExecution(): bool
|
||
{
|
||
return false; // Needs sequential access for request history
|
||
}
|
||
|
||
public function getDependencies(): array
|
||
{
|
||
return []; // No dependencies
|
||
}
|
||
}
|