- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
915 lines
27 KiB
PHP
915 lines
27 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Waf\MachineLearning\Extractors;
|
|
|
|
use App\Framework\Waf\Analysis\ValueObjects\RequestAnalysisData;
|
|
use App\Framework\Waf\MachineLearning\BehaviorType;
|
|
use App\Framework\Waf\MachineLearning\FeatureExtractorInterface;
|
|
use App\Framework\Waf\MachineLearning\ValueObjects\BehaviorFeature;
|
|
|
|
/**
|
|
* Extracts behavioral patterns from URL paths, parameters, and request structure
|
|
*/
|
|
final class PatternFeatureExtractor implements FeatureExtractorInterface
|
|
{
|
|
public function __construct(
|
|
private readonly bool $enabled = true,
|
|
private readonly int $maxPathSegments = 20,
|
|
private readonly int $maxParameterKeys = 100,
|
|
private readonly int $historySize = 100,
|
|
private array $pathHistory = [],
|
|
private array $parameterHistory = []
|
|
) {
|
|
}
|
|
|
|
public function getBehaviorType(): BehaviorType
|
|
{
|
|
return BehaviorType::PATH_PATTERNS;
|
|
}
|
|
|
|
public function canExtract(RequestAnalysisData $requestData): bool
|
|
{
|
|
return ! empty($requestData->path);
|
|
}
|
|
|
|
public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array
|
|
{
|
|
$clientId = $this->getClientId($requestData);
|
|
|
|
// Record current request patterns
|
|
$this->recordPatterns($clientId, $requestData);
|
|
|
|
$features = [];
|
|
|
|
// Path-based features
|
|
$features = array_merge($features, $this->extractPathFeatures($requestData, $clientId));
|
|
|
|
// Parameter-based features
|
|
$features = array_merge($features, $this->extractParameterFeatures($requestData, $clientId));
|
|
|
|
// Sequence-based features
|
|
$features = array_merge($features, $this->extractSequenceFeatures($requestData, $clientId));
|
|
|
|
// Structure-based features
|
|
$features = array_merge($features, $this->extractStructureFeatures($requestData));
|
|
|
|
return array_filter($features);
|
|
}
|
|
|
|
/**
|
|
* Extract path-related behavioral features
|
|
*/
|
|
private function extractPathFeatures(RequestAnalysisData $requestData, string $clientId): array
|
|
{
|
|
$features = [];
|
|
$path = $requestData->path;
|
|
|
|
// Path structure features
|
|
$features[] = $this->extractPathDepth($path);
|
|
$features[] = $this->extractPathComplexity($path);
|
|
$features[] = $this->extractPathEntropy($path);
|
|
|
|
// Path pattern features
|
|
$features[] = $this->extractPathUniqueness($clientId);
|
|
$features[] = $this->extractPathRepetition($clientId);
|
|
$features[] = $this->extractPathDiversity($clientId);
|
|
|
|
// Suspicious path characteristics
|
|
$features[] = $this->extractSuspiciousPathScore($path);
|
|
$features[] = $this->extractFileExtensionPattern($path);
|
|
$features[] = $this->extractDirectoryTraversalScore($path);
|
|
|
|
return $features;
|
|
}
|
|
|
|
/**
|
|
* Extract parameter-related behavioral features
|
|
*/
|
|
private function extractParameterFeatures(RequestAnalysisData $requestData, string $clientId): array
|
|
{
|
|
$features = [];
|
|
$allParams = $requestData->getAllParameters();
|
|
|
|
if (empty($allParams)) {
|
|
return [];
|
|
}
|
|
|
|
// Parameter count and structure
|
|
$features[] = $this->extractParameterCount($allParams);
|
|
$features[] = $this->extractParameterComplexity($allParams);
|
|
$features[] = $this->extractParameterEntropy($allParams);
|
|
|
|
// Parameter patterns
|
|
$features[] = $this->extractParameterUniqueness($clientId);
|
|
$features[] = $this->extractParameterKeyDiversity($clientId);
|
|
$features[] = $this->extractParameterValueEntropy($allParams);
|
|
|
|
// Suspicious parameter characteristics
|
|
$features[] = $this->extractSuspiciousParameterScore($allParams);
|
|
$features[] = $this->extractInjectionPatternScore($allParams);
|
|
|
|
return $features;
|
|
}
|
|
|
|
/**
|
|
* Extract sequence-based features
|
|
*/
|
|
private function extractSequenceFeatures(RequestAnalysisData $requestData, string $clientId): array
|
|
{
|
|
$features = [];
|
|
|
|
// Path sequence analysis
|
|
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
|
if (count($pathHistory) >= 2) {
|
|
$features[] = $this->extractPathSequenceEntropy($pathHistory);
|
|
$features[] = $this->extractPathTransitionScore($pathHistory);
|
|
$features[] = $this->extractNavigationPattern($pathHistory);
|
|
}
|
|
|
|
return $features;
|
|
}
|
|
|
|
/**
|
|
* Extract structural features
|
|
*/
|
|
private function extractStructureFeatures(RequestAnalysisData $requestData): array
|
|
{
|
|
$features = [];
|
|
|
|
// Request structure
|
|
$features[] = $this->extractRequestComplexity($requestData);
|
|
$features[] = $this->extractHeaderToBodyRatio($requestData);
|
|
$features[] = $this->extractContentTypeConsistency($requestData);
|
|
|
|
return $features;
|
|
}
|
|
|
|
/**
|
|
* Extract path depth (number of segments)
|
|
*/
|
|
private function extractPathDepth(string $path): BehaviorFeature
|
|
{
|
|
$segments = array_filter(explode('/', trim($path, '/')));
|
|
$depth = count($segments);
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_depth',
|
|
value: $depth,
|
|
unit: 'segments'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path complexity score
|
|
*/
|
|
private function extractPathComplexity(string $path): BehaviorFeature
|
|
{
|
|
$segments = array_filter(explode('/', trim($path, '/')));
|
|
|
|
$complexity = 0.0;
|
|
|
|
foreach ($segments as $segment) {
|
|
// Length complexity
|
|
$complexity += strlen($segment) / 20.0;
|
|
|
|
// Character diversity
|
|
$uniqueChars = count(array_unique(str_split($segment)));
|
|
$complexity += $uniqueChars / 10.0;
|
|
|
|
// Special characters
|
|
$specialChars = preg_match_all('/[^a-zA-Z0-9_-]/', $segment);
|
|
$complexity += $specialChars * 0.5;
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_complexity',
|
|
value: $complexity,
|
|
unit: 'complexity_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path entropy
|
|
*/
|
|
private function extractPathEntropy(string $path): BehaviorFeature
|
|
{
|
|
// Character frequency distribution
|
|
$chars = str_split(strtolower($path));
|
|
$distribution = array_count_values($chars);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_entropy',
|
|
distribution: array_values($distribution)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path uniqueness for this client
|
|
*/
|
|
private function extractPathUniqueness(string $clientId): BehaviorFeature
|
|
{
|
|
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
|
|
|
if (empty($pathHistory)) {
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_uniqueness',
|
|
value: 1.0,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
$uniquePaths = count(array_unique($pathHistory));
|
|
$totalPaths = count($pathHistory);
|
|
|
|
$uniqueness = $totalPaths > 0 ? $uniquePaths / $totalPaths : 0.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_uniqueness',
|
|
value: $uniqueness,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path repetition score
|
|
*/
|
|
private function extractPathRepetition(string $clientId): BehaviorFeature
|
|
{
|
|
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
|
|
|
if (count($pathHistory) < 2) {
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_repetition',
|
|
value: 0.0,
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
$pathCounts = array_count_values($pathHistory);
|
|
$maxCount = max($pathCounts);
|
|
$totalCount = count($pathHistory);
|
|
|
|
$repetition = $totalCount > 0 ? $maxCount / $totalCount : 0.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_repetition',
|
|
value: $repetition,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path diversity score
|
|
*/
|
|
private function extractPathDiversity(string $clientId): BehaviorFeature
|
|
{
|
|
$pathHistory = $this->pathHistory[$clientId] ?? [];
|
|
|
|
if (empty($pathHistory)) {
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_diversity',
|
|
value: 0.0,
|
|
unit: 'bits'
|
|
);
|
|
}
|
|
|
|
$pathCounts = array_count_values($pathHistory);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_diversity',
|
|
distribution: array_values($pathCounts)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract suspicious path characteristics score
|
|
*/
|
|
private function extractSuspiciousPathScore(string $path): BehaviorFeature
|
|
{
|
|
$suspiciousScore = 0.0;
|
|
|
|
// Admin/system paths
|
|
$adminPatterns = ['/admin', '/administrator', '/config', '/debug', '/test'];
|
|
foreach ($adminPatterns as $pattern) {
|
|
if (stripos($path, $pattern) !== false) {
|
|
$suspiciousScore += 0.3;
|
|
}
|
|
}
|
|
|
|
// Encoded characters
|
|
if (preg_match('/%[0-9a-fA-F]{2}/', $path)) {
|
|
$suspiciousScore += 0.2;
|
|
}
|
|
|
|
// Double encoding
|
|
if (preg_match('/%25[0-9a-fA-F]{2}/', $path)) {
|
|
$suspiciousScore += 0.4;
|
|
}
|
|
|
|
// Null bytes
|
|
if (strpos($path, '%00') !== false) {
|
|
$suspiciousScore += 0.5;
|
|
}
|
|
|
|
// Excessive length
|
|
if (strlen($path) > 200) {
|
|
$suspiciousScore += 0.2;
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'suspicious_path_score',
|
|
value: min($suspiciousScore, 1.0),
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract file extension pattern
|
|
*/
|
|
private function extractFileExtensionPattern(string $path): BehaviorFeature
|
|
{
|
|
$extension = pathinfo($path, PATHINFO_EXTENSION);
|
|
$extension = strtolower($extension);
|
|
|
|
$riskScore = 0.0;
|
|
|
|
$dangerousExtensions = [
|
|
'php', 'asp', 'aspx', 'jsp', 'py', 'pl', 'cgi', 'sh', 'bat', 'exe',
|
|
];
|
|
|
|
if (in_array($extension, $dangerousExtensions, true)) {
|
|
$riskScore = 1.0;
|
|
} elseif (! empty($extension)) {
|
|
$riskScore = 0.1; // Any extension is slightly suspicious
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'file_extension_risk',
|
|
value: $riskScore,
|
|
unit: 'risk_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract directory traversal score
|
|
*/
|
|
private function extractDirectoryTraversalScore(string $path): BehaviorFeature
|
|
{
|
|
$traversalScore = 0.0;
|
|
|
|
// Count directory traversal patterns
|
|
$patterns = ['../', '..\\', '%2e%2e%2f', '%2e%2e%5c'];
|
|
|
|
foreach ($patterns as $pattern) {
|
|
$matches = substr_count(strtolower($path), strtolower($pattern));
|
|
$traversalScore += $matches * 0.3;
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'directory_traversal_score',
|
|
value: min($traversalScore, 1.0),
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter count
|
|
*/
|
|
private function extractParameterCount(array $parameters): BehaviorFeature
|
|
{
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_count',
|
|
value: count($parameters),
|
|
unit: 'count'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter complexity
|
|
*/
|
|
private function extractParameterComplexity(array $parameters): BehaviorFeature
|
|
{
|
|
$complexity = 0.0;
|
|
|
|
foreach ($parameters as $key => $value) {
|
|
// Key complexity
|
|
$complexity += strlen($key) / 50.0;
|
|
$complexity += preg_match_all('/[^a-zA-Z0-9_]/', $key) * 0.1;
|
|
|
|
// Value complexity
|
|
if (is_string($value)) {
|
|
$complexity += strlen($value) / 200.0;
|
|
$complexity += preg_match_all('/[^a-zA-Z0-9\\s]/', $value) * 0.05;
|
|
}
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_complexity',
|
|
value: $complexity,
|
|
unit: 'complexity_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter key entropy
|
|
*/
|
|
private function extractParameterEntropy(array $parameters): BehaviorFeature
|
|
{
|
|
if (empty($parameters)) {
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_entropy',
|
|
value: 0.0,
|
|
unit: 'bits'
|
|
);
|
|
}
|
|
|
|
// Character distribution across all parameter keys
|
|
$allKeys = implode('', array_keys($parameters));
|
|
$chars = str_split(strtolower($allKeys));
|
|
$distribution = array_count_values($chars);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_entropy',
|
|
distribution: array_values($distribution)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter uniqueness for this client
|
|
*/
|
|
private function extractParameterUniqueness(string $clientId): BehaviorFeature
|
|
{
|
|
$paramHistory = $this->parameterHistory[$clientId] ?? [];
|
|
|
|
if (empty($paramHistory)) {
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_uniqueness',
|
|
value: 1.0,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
$uniqueParams = count(array_unique($paramHistory, SORT_REGULAR));
|
|
$totalParams = count($paramHistory);
|
|
|
|
$uniqueness = $totalParams > 0 ? $uniqueParams / $totalParams : 0.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_uniqueness',
|
|
value: $uniqueness,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter key diversity
|
|
*/
|
|
private function extractParameterKeyDiversity(string $clientId): BehaviorFeature
|
|
{
|
|
$paramHistory = $this->parameterHistory[$clientId] ?? [];
|
|
|
|
if (empty($paramHistory)) {
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_key_diversity',
|
|
value: 0.0,
|
|
unit: 'bits'
|
|
);
|
|
}
|
|
|
|
// Collect all parameter keys
|
|
$allKeys = [];
|
|
foreach ($paramHistory as $params) {
|
|
if (is_array($params)) {
|
|
$allKeys = array_merge($allKeys, array_keys($params));
|
|
}
|
|
}
|
|
|
|
$keyCounts = array_count_values($allKeys);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_key_diversity',
|
|
distribution: array_values($keyCounts)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract parameter value entropy
|
|
*/
|
|
private function extractParameterValueEntropy(array $parameters): BehaviorFeature
|
|
{
|
|
if (empty($parameters)) {
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_value_entropy',
|
|
value: 0.0,
|
|
unit: 'bits'
|
|
);
|
|
}
|
|
|
|
// Character distribution across all parameter values
|
|
$allValues = implode('', array_filter(array_values($parameters), 'is_string'));
|
|
|
|
if (empty($allValues)) {
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_value_entropy',
|
|
value: 0.0,
|
|
unit: 'bits'
|
|
);
|
|
}
|
|
|
|
$chars = str_split(strtolower($allValues));
|
|
$distribution = array_count_values($chars);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'parameter_value_entropy',
|
|
distribution: array_values($distribution)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract suspicious parameter score
|
|
*/
|
|
private function extractSuspiciousParameterScore(array $parameters): BehaviorFeature
|
|
{
|
|
$suspiciousScore = 0.0;
|
|
|
|
$suspiciousKeys = [
|
|
'eval', 'exec', 'system', 'cmd', 'command', 'shell',
|
|
'admin', 'root', 'password', 'pass', 'auth', 'token',
|
|
'debug', 'test', 'dev', 'config', 'settings',
|
|
];
|
|
|
|
foreach ($parameters as $key => $value) {
|
|
$lowerKey = strtolower($key);
|
|
|
|
// Check for suspicious parameter names
|
|
foreach ($suspiciousKeys as $suspicious) {
|
|
if (strpos($lowerKey, $suspicious) !== false) {
|
|
$suspiciousScore += 0.3;
|
|
}
|
|
}
|
|
|
|
// Check for encoded values
|
|
if (is_string($value) && preg_match('/%[0-9a-fA-F]{2}/', $value)) {
|
|
$suspiciousScore += 0.1;
|
|
}
|
|
|
|
// Check for extremely long values
|
|
if (is_string($value) && strlen($value) > 1000) {
|
|
$suspiciousScore += 0.2;
|
|
}
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'suspicious_parameter_score',
|
|
value: min($suspiciousScore, 1.0),
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract injection pattern score
|
|
*/
|
|
private function extractInjectionPatternScore(array $parameters): BehaviorFeature
|
|
{
|
|
$injectionScore = 0.0;
|
|
|
|
$injectionPatterns = [
|
|
'sql' => ['/union\\s+select/i', '/or\\s+1\\s*=\\s*1/i', '/\\s*;\\s*drop\\s+table/i'],
|
|
'xss' => ['/<script/i', '/javascript:/i', '/onerror\\s*=/i'],
|
|
'cmd' => ['/;\\s*(cat|ls|pwd|id)/i', '/\\|\\s*(nc|netcat)/i'],
|
|
];
|
|
|
|
foreach ($parameters as $key => $value) {
|
|
if (! is_string($value)) {
|
|
continue;
|
|
}
|
|
|
|
foreach ($injectionPatterns as $type => $patterns) {
|
|
foreach ($patterns as $pattern) {
|
|
if (preg_match($pattern, $value)) {
|
|
$injectionScore += 0.4;
|
|
|
|
break 2; // Break out of both loops
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: BehaviorType::PARAMETER_PATTERNS,
|
|
name: 'injection_pattern_score',
|
|
value: min($injectionScore, 1.0),
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path sequence entropy
|
|
*/
|
|
private function extractPathSequenceEntropy(array $pathHistory): BehaviorFeature
|
|
{
|
|
// Create bigrams (consecutive path pairs)
|
|
$bigrams = [];
|
|
for ($i = 0; $i < count($pathHistory) - 1; $i++) {
|
|
$bigram = $pathHistory[$i] . ' -> ' . $pathHistory[$i + 1];
|
|
$bigrams[] = $bigram;
|
|
}
|
|
|
|
$bigramCounts = array_count_values($bigrams);
|
|
|
|
return BehaviorFeature::entropy(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_sequence_entropy',
|
|
distribution: array_values($bigramCounts)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract path transition score
|
|
*/
|
|
private function extractPathTransitionScore(array $pathHistory): BehaviorFeature
|
|
{
|
|
if (count($pathHistory) < 2) {
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_transition_score',
|
|
value: 0.0,
|
|
unit: 'score'
|
|
);
|
|
}
|
|
|
|
$transitionScore = 0.0;
|
|
|
|
for ($i = 0; $i < count($pathHistory) - 1; $i++) {
|
|
$current = $pathHistory[$i];
|
|
$next = $pathHistory[$i + 1];
|
|
|
|
// Calculate path similarity (Levenshtein distance)
|
|
$similarity = 1.0 - (levenshtein($current, $next) / max(strlen($current), strlen($next)));
|
|
$transitionScore += $similarity;
|
|
}
|
|
|
|
$averageTransition = $transitionScore / (count($pathHistory) - 1);
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'path_transition_score',
|
|
value: $averageTransition,
|
|
unit: 'similarity_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract navigation pattern
|
|
*/
|
|
private function extractNavigationPattern(array $pathHistory): BehaviorFeature
|
|
{
|
|
$backtrackingScore = 0.0;
|
|
|
|
// Detect backtracking patterns (returning to previously visited paths)
|
|
for ($i = 2; $i < count($pathHistory); $i++) {
|
|
$current = $pathHistory[$i];
|
|
|
|
// Check if current path was visited in the last few requests
|
|
for ($j = max(0, $i - 5); $j < $i; $j++) {
|
|
if ($pathHistory[$j] === $current) {
|
|
$backtrackingScore += 1.0 / ($i - $j); // More recent = higher score
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
$normalizedScore = count($pathHistory) > 2 ? $backtrackingScore / (count($pathHistory) - 2) : 0.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'navigation_backtracking',
|
|
value: $normalizedScore,
|
|
unit: 'backtracking_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract request complexity
|
|
*/
|
|
private function extractRequestComplexity(RequestAnalysisData $requestData): BehaviorFeature
|
|
{
|
|
$complexity = 0.0;
|
|
|
|
// Path complexity
|
|
$complexity += strlen($requestData->path) / 100.0;
|
|
|
|
// Parameter complexity
|
|
$paramCount = count($requestData->getAllParameters());
|
|
$complexity += $paramCount / 20.0;
|
|
|
|
// Header complexity
|
|
$headerCount = count($requestData->headers);
|
|
$complexity += $headerCount / 30.0;
|
|
|
|
// Body complexity
|
|
$bodySize = strlen($requestData->body);
|
|
$complexity += $bodySize / 5000.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'request_complexity',
|
|
value: $complexity,
|
|
unit: 'complexity_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract header to body ratio
|
|
*/
|
|
private function extractHeaderToBodyRatio(RequestAnalysisData $requestData): BehaviorFeature
|
|
{
|
|
$headerSize = array_sum(array_map(
|
|
fn ($name, $value) => strlen($name) + strlen($value),
|
|
array_keys($requestData->headers),
|
|
array_values($requestData->headers)
|
|
));
|
|
|
|
$bodySize = strlen($requestData->body);
|
|
|
|
$ratio = ($headerSize + $bodySize) > 0 ? $headerSize / ($headerSize + $bodySize) : 0.0;
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'header_body_ratio',
|
|
value: $ratio,
|
|
unit: 'ratio'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract content type consistency
|
|
*/
|
|
private function extractContentTypeConsistency(RequestAnalysisData $requestData): BehaviorFeature
|
|
{
|
|
$consistencyScore = 1.0;
|
|
|
|
// Check if content type matches the actual content
|
|
if ($requestData->contentType !== null) {
|
|
if ($requestData->isJson() && ! empty($requestData->body)) {
|
|
json_decode($requestData->body);
|
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
|
$consistencyScore -= 0.5;
|
|
}
|
|
}
|
|
|
|
if ($requestData->isXml() && ! empty($requestData->body)) {
|
|
$previousSetting = libxml_use_internal_errors(true);
|
|
simplexml_load_string($requestData->body);
|
|
$errors = libxml_get_errors();
|
|
libxml_use_internal_errors($previousSetting);
|
|
libxml_clear_errors();
|
|
|
|
if (! empty($errors)) {
|
|
$consistencyScore -= 0.5;
|
|
}
|
|
}
|
|
}
|
|
|
|
return BehaviorFeature::create(
|
|
type: $this->getBehaviorType(),
|
|
name: 'content_type_consistency',
|
|
value: max(0.0, $consistencyScore),
|
|
unit: 'consistency_score'
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Record patterns for this client
|
|
*/
|
|
private function recordPatterns(string $clientId, RequestAnalysisData $requestData): void
|
|
{
|
|
// Record path
|
|
if (! isset($this->pathHistory[$clientId])) {
|
|
$this->pathHistory[$clientId] = [];
|
|
}
|
|
|
|
$this->pathHistory[$clientId][] = $requestData->path;
|
|
|
|
// Limit history size
|
|
if (count($this->pathHistory[$clientId]) > $this->historySize) {
|
|
array_shift($this->pathHistory[$clientId]);
|
|
}
|
|
|
|
// Record parameters
|
|
if (! isset($this->parameterHistory[$clientId])) {
|
|
$this->parameterHistory[$clientId] = [];
|
|
}
|
|
|
|
$allParams = $requestData->getAllParameters();
|
|
if (! empty($allParams)) {
|
|
$this->parameterHistory[$clientId][] = $allParams;
|
|
|
|
// Limit history size
|
|
if (count($this->parameterHistory[$clientId]) > $this->historySize) {
|
|
array_shift($this->parameterHistory[$clientId]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get client identifier
|
|
*/
|
|
private function getClientId(RequestAnalysisData $requestData): string
|
|
{
|
|
if (! empty($requestData->sessionId)) {
|
|
return 'session:' . $requestData->sessionId;
|
|
}
|
|
|
|
if ($requestData->clientIp !== null) {
|
|
return 'ip:' . $requestData->clientIp->toString();
|
|
}
|
|
|
|
return 'unknown';
|
|
}
|
|
|
|
public function getFeatureNames(): array
|
|
{
|
|
return [
|
|
// Path features
|
|
'path_depth', 'path_complexity', 'path_entropy', 'path_uniqueness',
|
|
'path_repetition', 'path_diversity', 'suspicious_path_score',
|
|
'file_extension_risk', 'directory_traversal_score',
|
|
|
|
// Parameter features
|
|
'parameter_count', 'parameter_complexity', 'parameter_entropy',
|
|
'parameter_uniqueness', 'parameter_key_diversity', 'parameter_value_entropy',
|
|
'suspicious_parameter_score', 'injection_pattern_score',
|
|
|
|
// Sequence features
|
|
'path_sequence_entropy', 'path_transition_score', 'navigation_backtracking',
|
|
|
|
// Structure features
|
|
'request_complexity', 'header_body_ratio', 'content_type_consistency',
|
|
];
|
|
}
|
|
|
|
public function getConfiguration(): array
|
|
{
|
|
return [
|
|
'enabled' => $this->enabled,
|
|
'max_path_segments' => $this->maxPathSegments,
|
|
'max_parameter_keys' => $this->maxParameterKeys,
|
|
'history_size' => $this->historySize,
|
|
'feature_count' => count($this->getFeatureNames()),
|
|
];
|
|
}
|
|
|
|
public function isEnabled(): bool
|
|
{
|
|
return $this->enabled;
|
|
}
|
|
|
|
public function getPriority(): int
|
|
{
|
|
return 80; // Medium-high priority
|
|
}
|
|
|
|
public function getExpectedProcessingTime(): int
|
|
{
|
|
return 75; // milliseconds
|
|
}
|
|
|
|
public function supportsParallelExecution(): bool
|
|
{
|
|
return false; // Needs sequential access for pattern history
|
|
}
|
|
|
|
public function getDependencies(): array
|
|
{
|
|
return []; // No dependencies
|
|
}
|
|
}
|