path); } public function extractFeatures(RequestAnalysisData $requestData, array $context = []): array { $clientId = $this->getClientId($requestData); // Record current request patterns $this->recordPatterns($clientId, $requestData); $features = []; // Path-based features $features = array_merge($features, $this->extractPathFeatures($requestData, $clientId)); // Parameter-based features $features = array_merge($features, $this->extractParameterFeatures($requestData, $clientId)); // Sequence-based features $features = array_merge($features, $this->extractSequenceFeatures($requestData, $clientId)); // Structure-based features $features = array_merge($features, $this->extractStructureFeatures($requestData)); return array_filter($features); } /** * Extract path-related behavioral features */ private function extractPathFeatures(RequestAnalysisData $requestData, string $clientId): array { $features = []; $path = $requestData->path; // Path structure features $features[] = $this->extractPathDepth($path); $features[] = $this->extractPathComplexity($path); $features[] = $this->extractPathEntropy($path); // Path pattern features $features[] = $this->extractPathUniqueness($clientId); $features[] = $this->extractPathRepetition($clientId); $features[] = $this->extractPathDiversity($clientId); // Suspicious path characteristics $features[] = $this->extractSuspiciousPathScore($path); $features[] = $this->extractFileExtensionPattern($path); $features[] = $this->extractDirectoryTraversalScore($path); return $features; } /** * Extract parameter-related behavioral features */ private function extractParameterFeatures(RequestAnalysisData $requestData, string $clientId): array { $features = []; $allParams = $requestData->getAllParameters(); if (empty($allParams)) { return []; } // Parameter count and structure $features[] = $this->extractParameterCount($allParams); $features[] = $this->extractParameterComplexity($allParams); $features[] = $this->extractParameterEntropy($allParams); // Parameter patterns $features[] = $this->extractParameterUniqueness($clientId); $features[] = $this->extractParameterKeyDiversity($clientId); $features[] = $this->extractParameterValueEntropy($allParams); // Suspicious parameter characteristics $features[] = $this->extractSuspiciousParameterScore($allParams); $features[] = $this->extractInjectionPatternScore($allParams); return $features; } /** * Extract sequence-based features */ private function extractSequenceFeatures(RequestAnalysisData $requestData, string $clientId): array { $features = []; // Path sequence analysis $pathHistory = $this->pathHistory[$clientId] ?? []; if (count($pathHistory) >= 2) { $features[] = $this->extractPathSequenceEntropy($pathHistory); $features[] = $this->extractPathTransitionScore($pathHistory); $features[] = $this->extractNavigationPattern($pathHistory); } return $features; } /** * Extract structural features */ private function extractStructureFeatures(RequestAnalysisData $requestData): array { $features = []; // Request structure $features[] = $this->extractRequestComplexity($requestData); $features[] = $this->extractHeaderToBodyRatio($requestData); $features[] = $this->extractContentTypeConsistency($requestData); return $features; } /** * Extract path depth (number of segments) */ private function extractPathDepth(string $path): BehaviorFeature { $segments = array_filter(explode('/', trim($path, '/'))); $depth = count($segments); return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_depth', value: $depth, unit: 'segments' ); } /** * Extract path complexity score */ private function extractPathComplexity(string $path): BehaviorFeature { $segments = array_filter(explode('/', trim($path, '/'))); $complexity = 0.0; foreach ($segments as $segment) { // Length complexity $complexity += strlen($segment) / 20.0; // Character diversity $uniqueChars = count(array_unique(str_split($segment))); $complexity += $uniqueChars / 10.0; // Special characters $specialChars = preg_match_all('/[^a-zA-Z0-9_-]/', $segment); $complexity += $specialChars * 0.5; } return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_complexity', value: $complexity, unit: 'complexity_score' ); } /** * Extract path entropy */ private function extractPathEntropy(string $path): BehaviorFeature { // Character frequency distribution $chars = str_split(strtolower($path)); $distribution = array_count_values($chars); return BehaviorFeature::entropy( type: $this->getBehaviorType(), name: 'path_entropy', distribution: array_values($distribution) ); } /** * Extract path uniqueness for this client */ private function extractPathUniqueness(string $clientId): BehaviorFeature { $pathHistory = $this->pathHistory[$clientId] ?? []; if (empty($pathHistory)) { return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_uniqueness', value: 1.0, unit: 'ratio' ); } $uniquePaths = count(array_unique($pathHistory)); $totalPaths = count($pathHistory); $uniqueness = $totalPaths > 0 ? $uniquePaths / $totalPaths : 0.0; return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_uniqueness', value: $uniqueness, unit: 'ratio' ); } /** * Extract path repetition score */ private function extractPathRepetition(string $clientId): BehaviorFeature { $pathHistory = $this->pathHistory[$clientId] ?? []; if (count($pathHistory) < 2) { return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_repetition', value: 0.0, unit: 'score' ); } $pathCounts = array_count_values($pathHistory); $maxCount = max($pathCounts); $totalCount = count($pathHistory); $repetition = $totalCount > 0 ? $maxCount / $totalCount : 0.0; return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_repetition', value: $repetition, unit: 'ratio' ); } /** * Extract path diversity score */ private function extractPathDiversity(string $clientId): BehaviorFeature { $pathHistory = $this->pathHistory[$clientId] ?? []; if (empty($pathHistory)) { return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'path_diversity', value: 0.0, unit: 'bits' ); } $pathCounts = array_count_values($pathHistory); return BehaviorFeature::entropy( type: $this->getBehaviorType(), name: 'path_diversity', distribution: array_values($pathCounts) ); } /** * Extract suspicious path characteristics score */ private function extractSuspiciousPathScore(string $path): BehaviorFeature { $suspiciousScore = 0.0; // Admin/system paths $adminPatterns = ['/admin', '/administrator', '/config', '/debug', '/test']; foreach ($adminPatterns as $pattern) { if (stripos($path, $pattern) !== false) { $suspiciousScore += 0.3; } } // Encoded characters if (preg_match('/%[0-9a-fA-F]{2}/', $path)) { $suspiciousScore += 0.2; } // Double encoding if (preg_match('/%25[0-9a-fA-F]{2}/', $path)) { $suspiciousScore += 0.4; } // Null bytes if (strpos($path, '%00') !== false) { $suspiciousScore += 0.5; } // Excessive length if (strlen($path) > 200) { $suspiciousScore += 0.2; } return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'suspicious_path_score', value: min($suspiciousScore, 1.0), unit: 'score' ); } /** * Extract file extension pattern */ private function extractFileExtensionPattern(string $path): BehaviorFeature { $extension = pathinfo($path, PATHINFO_EXTENSION); $extension = strtolower($extension); $riskScore = 0.0; $dangerousExtensions = [ 'php', 'asp', 'aspx', 'jsp', 'py', 'pl', 'cgi', 'sh', 'bat', 'exe', ]; if (in_array($extension, $dangerousExtensions, true)) { $riskScore = 1.0; } elseif (! empty($extension)) { $riskScore = 0.1; // Any extension is slightly suspicious } return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'file_extension_risk', value: $riskScore, unit: 'risk_score' ); } /** * Extract directory traversal score */ private function extractDirectoryTraversalScore(string $path): BehaviorFeature { $traversalScore = 0.0; // Count directory traversal patterns $patterns = ['../', '..\\', '%2e%2e%2f', '%2e%2e%5c']; foreach ($patterns as $pattern) { $matches = substr_count(strtolower($path), strtolower($pattern)); $traversalScore += $matches * 0.3; } return BehaviorFeature::create( type: $this->getBehaviorType(), name: 'directory_traversal_score', value: min($traversalScore, 1.0), unit: 'score' ); } /** * Extract parameter count */ private function extractParameterCount(array $parameters): BehaviorFeature { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_count', value: count($parameters), unit: 'count' ); } /** * Extract parameter complexity */ private function extractParameterComplexity(array $parameters): BehaviorFeature { $complexity = 0.0; foreach ($parameters as $key => $value) { // Key complexity $complexity += strlen($key) / 50.0; $complexity += preg_match_all('/[^a-zA-Z0-9_]/', $key) * 0.1; // Value complexity if (is_string($value)) { $complexity += strlen($value) / 200.0; $complexity += preg_match_all('/[^a-zA-Z0-9\\s]/', $value) * 0.05; } } return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_complexity', value: $complexity, unit: 'complexity_score' ); } /** * Extract parameter key entropy */ private function extractParameterEntropy(array $parameters): BehaviorFeature { if (empty($parameters)) { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_entropy', value: 0.0, unit: 'bits' ); } // Character distribution across all parameter keys $allKeys = implode('', array_keys($parameters)); $chars = str_split(strtolower($allKeys)); $distribution = array_count_values($chars); return BehaviorFeature::entropy( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_entropy', distribution: array_values($distribution) ); } /** * Extract parameter uniqueness for this client */ private function extractParameterUniqueness(string $clientId): BehaviorFeature { $paramHistory = $this->parameterHistory[$clientId] ?? []; if (empty($paramHistory)) { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_uniqueness', value: 1.0, unit: 'ratio' ); } $uniqueParams = count(array_unique($paramHistory, SORT_REGULAR)); $totalParams = count($paramHistory); $uniqueness = $totalParams > 0 ? $uniqueParams / $totalParams : 0.0; return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_uniqueness', value: $uniqueness, unit: 'ratio' ); } /** * Extract parameter key diversity */ private function extractParameterKeyDiversity(string $clientId): BehaviorFeature { $paramHistory = $this->parameterHistory[$clientId] ?? []; if (empty($paramHistory)) { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_key_diversity', value: 0.0, unit: 'bits' ); } // Collect all parameter keys $allKeys = []; foreach ($paramHistory as $params) { if (is_array($params)) { $allKeys = array_merge($allKeys, array_keys($params)); } } $keyCounts = array_count_values($allKeys); return BehaviorFeature::entropy( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_key_diversity', distribution: array_values($keyCounts) ); } /** * Extract parameter value entropy */ private function extractParameterValueEntropy(array $parameters): BehaviorFeature { if (empty($parameters)) { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_value_entropy', value: 0.0, unit: 'bits' ); } // Character distribution across all parameter values $allValues = implode('', array_filter(array_values($parameters), 'is_string')); if (empty($allValues)) { return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_value_entropy', value: 0.0, unit: 'bits' ); } $chars = str_split(strtolower($allValues)); $distribution = array_count_values($chars); return BehaviorFeature::entropy( type: BehaviorType::PARAMETER_PATTERNS, name: 'parameter_value_entropy', distribution: array_values($distribution) ); } /** * Extract suspicious parameter score */ private function extractSuspiciousParameterScore(array $parameters): BehaviorFeature { $suspiciousScore = 0.0; $suspiciousKeys = [ 'eval', 'exec', 'system', 'cmd', 'command', 'shell', 'admin', 'root', 'password', 'pass', 'auth', 'token', 'debug', 'test', 'dev', 'config', 'settings', ]; foreach ($parameters as $key => $value) { $lowerKey = strtolower($key); // Check for suspicious parameter names foreach ($suspiciousKeys as $suspicious) { if (strpos($lowerKey, $suspicious) !== false) { $suspiciousScore += 0.3; } } // Check for encoded values if (is_string($value) && preg_match('/%[0-9a-fA-F]{2}/', $value)) { $suspiciousScore += 0.1; } // Check for extremely long values if (is_string($value) && strlen($value) > 1000) { $suspiciousScore += 0.2; } } return BehaviorFeature::create( type: BehaviorType::PARAMETER_PATTERNS, name: 'suspicious_parameter_score', value: min($suspiciousScore, 1.0), unit: 'score' ); } /** * Extract injection pattern score */ private function extractInjectionPatternScore(array $parameters): BehaviorFeature { $injectionScore = 0.0; $injectionPatterns = [ 'sql' => ['/union\\s+select/i', '/or\\s+1\\s*=\\s*1/i', '/\\s*;\\s*drop\\s+table/i'], 'xss' => ['/