michaelschiemer/src/Framework/MachineLearning/ModelManagement/AutoTuningEngine.php

<?php

declare(strict_types=1);

namespace App\Framework\MachineLearning\ModelManagement;

use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
use App\Framework\Core\ValueObjects\Version;
use App\Framework\Core\ValueObjects\Duration;

/**
 * Auto-Tuning Engine - Automatic Threshold Optimization
 *
 * Automatically optimizes ML model thresholds and hyperparameters based on
 * production performance data:
 * - Threshold optimization (anomaly detection, classification cutoffs)
 * - Hyperparameter tuning (learning rate, window sizes, etc.)
 * - Grid search and Bayesian optimization
 * - Performance-cost trade-off optimization
 *
 * Usage:
 * ```php
 * // Optimize anomaly threshold
 * $result = $autoTuning->optimizeThreshold(
 *     modelName: 'n1-detector',
 *     version: Version::fromString('1.0.0'),
 *     metricToOptimize: 'f1_score',
 *     thresholdRange: [0.5, 0.9],
 *     step: 0.05
 * );
 *
 * // Apply optimized threshold
 * $model->updateConfiguration(['threshold' => $result->optimal_threshold]);
 * ```
 */
final readonly class AutoTuningEngine
{
    /**
     * @param ModelPerformanceMonitor $performanceMonitor Performance data source
     * @param ModelRegistry $registry Model registry for config updates
     * @param PerformanceStorage $storage Historical performance data
     */
    public function __construct(
        private ModelPerformanceMonitor $performanceMonitor,
        private ModelRegistry $registry,
        private PerformanceStorage $storage
    ) {}

    /**
     * Optimize anomaly detection threshold
     *
     * Finds optimal threshold that maximizes specified metric (F1-score by default).
     *
     * @param string $modelName Model identifier
     * @param Version $version Model version
     * @param string $metricToOptimize Metric to maximize ('f1_score', 'accuracy', 'precision', 'recall')
     * @param array{float, float} $thresholdRange Min/max threshold values
     * @param float $step Step size for grid search
     * @param Duration|null $timeWindow Time window for evaluation data
     *
     * @return array{
     *     optimal_threshold: float,
     *     optimal_metric_value: float,
     *     current_threshold: float,
     *     current_metric_value: float,
     *     improvement_percent: float,
     *     all_results: array,
     *     recommendation: string
     * }
     */
    public function optimizeThreshold(
        string $modelName,
        Version $version,
        string $metricToOptimize = 'f1_score',
        array $thresholdRange = [0.5, 0.9],
        float $step = 0.05,
        ?Duration $timeWindow = null
    ): array {
        $timeWindow = $timeWindow ?? Duration::fromHours(24);

        // Get historical predictions
        $predictions = $this->storage->getPredictions($modelName, $version, $timeWindow);

        if (count($predictions) < 100) {
            throw new \RuntimeException(
                'Not enough data for threshold optimization (minimum 100 predictions required)'
            );
        }

        // Get current configuration
        $metadata = $this->registry->get($modelName, $version);
        $currentThreshold = $metadata?->configuration['threshold'] ?? 0.5;

        // Grid search over threshold range
        $results = [];
        for ($threshold = $thresholdRange[0]; $threshold <= $thresholdRange[1]; $threshold += $step) {
            $metrics = $this->evaluateThreshold($predictions, $threshold);
            $results[$threshold] = $metrics[$metricToOptimize] ?? 0.0;
        }

        // Find optimal threshold
        arsort($results);
        $optimalThreshold = array_key_first($results);
        $optimalMetricValue = $results[$optimalThreshold];

        // Calculate improvement
        $currentMetrics = $this->evaluateThreshold($predictions, $currentThreshold);
        $currentMetricValue = $currentMetrics[$metricToOptimize] ?? 0.0;

        $improvement = $currentMetricValue > 0
            ? (($optimalMetricValue - $currentMetricValue) / $currentMetricValue) * 100
            : 0.0;

        $recommendation = $this->generateThresholdRecommendation(
            $improvement,
            $optimalThreshold,
            $currentThreshold
        );

        return [
            'optimal_threshold' => $optimalThreshold,
            'optimal_metric_value' => $optimalMetricValue,
            'current_threshold' => $currentThreshold,
            'current_metric_value' => $currentMetricValue,
            'improvement_percent' => $improvement,
            'all_results' => $results,
            'recommendation' => $recommendation,
            'metric_optimized' => $metricToOptimize,
        ];
    }

    /**
     * Optimize multiple hyperparameters simultaneously
     *
     * @param string $modelName Model identifier
     * @param Version $version Model version
     * @param array<string, array> $parameterRanges Parameter name => [min, max, step]
     * @param string $metricToOptimize Metric to maximize
     *
     * @return array{
     *     optimal_parameters: array,
     *     optimal_metric_value: float,
     *     current_parameters: array,
     *     current_metric_value: float,
     *     improvement_percent: float,
     *     total_combinations_tested: int
     * }
     */
    public function optimizeHyperparameters(
        string $modelName,
        Version $version,
        array $parameterRanges,
        string $metricToOptimize = 'f1_score'
    ): array {
        // Generate all parameter combinations (grid search)
        $combinations = $this->generateParameterCombinations($parameterRanges);

        $metadata = $this->registry->get($modelName, $version);
        $currentParams = $metadata?->configuration ?? [];

        $bestParams = null;
        $bestMetricValue = 0.0;

        foreach ($combinations as $params) {
            // Simulate model with these parameters
            $metrics = $this->evaluateParameterCombination(
                $modelName,
                $version,
                $params
            );

            $metricValue = $metrics[$metricToOptimize] ?? 0.0;

            if ($metricValue > $bestMetricValue) {
                $bestMetricValue = $metricValue;
                $bestParams = $params;
            }
        }

        // Calculate improvement
        $currentMetrics = $this->performanceMonitor->getCurrentMetrics($modelName, $version);
        $currentMetricValue = $currentMetrics[$metricToOptimize] ?? 0.0;

        $improvement = $currentMetricValue > 0
            ? (($bestMetricValue - $currentMetricValue) / $currentMetricValue) * 100
            : 0.0;

        return [
            'optimal_parameters' => $bestParams,
            'optimal_metric_value' => $bestMetricValue,
            'current_parameters' => $currentParams,
            'current_metric_value' => $currentMetricValue,
            'improvement_percent' => $improvement,
            'total_combinations_tested' => count($combinations),
        ];
    }

    /**
     * Optimize precision-recall trade-off
     *
     * Find threshold that achieves target precision while maximizing recall.
     *
     * @param string $modelName Model identifier
     * @param Version $version Model version
     * @param float $targetPrecision Target precision (0.0-1.0)
     * @param array{float, float} $thresholdRange Search range
     *
     * @return array{
     *     optimal_threshold: float,
     *     achieved_precision: float,
     *     achieved_recall: float,
     *     f1_score: float
     * }
     */
    public function optimizePrecisionRecallTradeoff(
        string $modelName,
        Version $version,
        float $targetPrecision = 0.95,
        array $thresholdRange = [0.5, 0.99]
    ): array {
        $predictions = $this->storage->getPredictions(
            $modelName,
            $version,
            Duration::fromHours(24)
        );

        $bestThreshold = $thresholdRange[0];
        $bestRecall = 0.0;
        $bestMetrics = null;

        // Search for threshold that meets precision target with max recall
        for ($threshold = $thresholdRange[0]; $threshold <= $thresholdRange[1]; $threshold += 0.01) {
            $metrics = $this->evaluateThreshold($predictions, $threshold);

            if ($metrics['precision'] >= $targetPrecision && $metrics['recall'] > $bestRecall) {
                $bestThreshold = $threshold;
                $bestRecall = $metrics['recall'];
                $bestMetrics = $metrics;
            }
        }

        return [
            'optimal_threshold' => $bestThreshold,
            'achieved_precision' => $bestMetrics['precision'] ?? 0.0,
            'achieved_recall' => $bestMetrics['recall'] ?? 0.0,
            'f1_score' => $bestMetrics['f1_score'] ?? 0.0,
        ];
    }

    /**
     * Adaptive threshold adjustment based on recent performance
     *
     * Dynamically adjusts threshold based on false positive/negative rate.
     *
     * @return array{
     *     recommended_threshold: float,
     *     current_threshold: float,
     *     adjustment_reason: string,
     *     expected_improvement: array
     * }
     */
    public function adaptiveThresholdAdjustment(
        string $modelName,
        Version $version
    ): array {
        $currentMetrics = $this->performanceMonitor->getCurrentMetrics(
            $modelName,
            $version,
            Duration::fromHours(1)
        );

        $metadata = $this->registry->get($modelName, $version);
        $currentThreshold = $metadata?->configuration['threshold'] ?? 0.5;

        $confusionMatrix = $currentMetrics['confusion_matrix'];
        $fp = $confusionMatrix['false_positive'];
        $fn = $confusionMatrix['false_negative'];
        $total = $currentMetrics['total_predictions'];

        // Calculate false positive and false negative rates
        $fpRate = $total > 0 ? $fp / $total : 0.0;
        $fnRate = $total > 0 ? $fn / $total : 0.0;

        // Adaptive adjustment logic
        $adjustment = 0.0;
        $reason = '';

        if ($fpRate > 0.1 && $fpRate > $fnRate) {
            // Too many false positives - increase threshold
            $adjustment = 0.05;
            $reason = 'High false positive rate - increasing threshold to reduce false alarms';
        } elseif ($fnRate > 0.1 && $fnRate > $fpRate) {
            // Too many false negatives - decrease threshold
            $adjustment = -0.05;
            $reason = 'High false negative rate - decreasing threshold to catch more anomalies';
        } else {
            $reason = 'Performance is balanced - no adjustment recommended';
        }

        $recommendedThreshold = max(0.1, min(0.99, $currentThreshold + $adjustment));

        // Estimate improvement
        $predictions = $this->storage->getPredictions(
            $modelName,
            $version,
            Duration::fromHours(24)
        );

        $expectedMetrics = $this->evaluateThreshold($predictions, $recommendedThreshold);

        return [
            'recommended_threshold' => $recommendedThreshold,
            'current_threshold' => $currentThreshold,
            'adjustment_reason' => $reason,
            'expected_improvement' => [
                'accuracy' => $expectedMetrics['accuracy'] - $currentMetrics['accuracy'],
                'precision' => $expectedMetrics['precision'] - ($currentMetrics['precision'] ?? 0.0),
                'recall' => $expectedMetrics['recall'] - ($currentMetrics['recall'] ?? 0.0),
            ],
            'current_fp_rate' => $fpRate,
            'current_fn_rate' => $fnRate,
        ];
    }

    /**
     * Evaluate threshold on historical predictions
     */
    private function evaluateThreshold(array $predictions, float $threshold): array
    {
        // Re-classify predictions with new threshold
        $reclassified = array_map(function ($p) use ($threshold) {
            $newPrediction = $p['confidence'] >= $threshold;

            return [
                'prediction' => $newPrediction,
                'actual' => $p['actual'],
                'is_correct' => $newPrediction === $p['actual'],
            ];
        }, $predictions);

        // Filter valid predictions
        $valid = array_filter($reclassified, fn($p) => $p['actual'] !== null);

        if (empty($valid)) {
            return $this->getEmptyMetrics();
        }

        // Calculate confusion matrix
        $tp = $tn = $fp = $fn = 0;

        foreach ($valid as $p) {
            if ($p['prediction'] === true && $p['actual'] === true) {
                $tp++;
            } elseif ($p['prediction'] === false && $p['actual'] === false) {
                $tn++;
            } elseif ($p['prediction'] === true && $p['actual'] === false) {
                $fp++;
            } else {
                $fn++;
            }
        }

        $total = count($valid);
        $correct = $tp + $tn;

        $accuracy = $total > 0 ? $correct / $total : 0.0;
        $precision = ($tp + $fp) > 0 ? $tp / ($tp + $fp) : 0.0;
        $recall = ($tp + $fn) > 0 ? $tp / ($tp + $fn) : 0.0;
        $f1Score = ($precision + $recall) > 0
            ? 2 * ($precision * $recall) / ($precision + $recall)
            : 0.0;

        return [
            'accuracy' => $accuracy,
            'precision' => $precision,
            'recall' => $recall,
            'f1_score' => $f1Score,
            'confusion_matrix' => [
                'true_positive' => $tp,
                'true_negative' => $tn,
                'false_positive' => $fp,
                'false_negative' => $fn,
            ],
        ];
    }

    /**
     * Generate all parameter combinations for grid search
     */
    private function generateParameterCombinations(array $parameterRanges): array
    {
        $combinations = [[]];

        foreach ($parameterRanges as $param => [$min, $max, $step]) {
            $newCombinations = [];

            for ($value = $min; $value <= $max; $value += $step) {
                foreach ($combinations as $combination) {
                    $newCombination = $combination;
                    $newCombination[$param] = $value;
                    $newCombinations[] = $newCombination;
                }
            }

            $combinations = $newCombinations;
        }

        return $combinations;
    }

    /**
     * Evaluate parameter combination
     */
    private function evaluateParameterCombination(
        string $modelName,
        Version $version,
        array $params
    ): array {
        // Simulate model performance with these parameters
        // In practice, this would retrain/reconfigure the model

        // For now, return current metrics
        return $this->performanceMonitor->getCurrentMetrics($modelName, $version);
    }

    private function generateThresholdRecommendation(
        float $improvement,
        float $optimalThreshold,
        float $currentThreshold
    ): string {
        if ($improvement < 1.0) {
            return sprintf(
                'Current threshold (%.2f) is near optimal - no change recommended',
                $currentThreshold
            );
        }

        if ($improvement > 10.0) {
            return sprintf(
                'SIGNIFICANT IMPROVEMENT: Update threshold from %.2f to %.2f (%.1f%% gain)',
                $currentThreshold,
                $optimalThreshold,
                $improvement
            );
        }

        return sprintf(
            'MODERATE IMPROVEMENT: Consider updating threshold from %.2f to %.2f (%.1f%% gain)',
            $currentThreshold,
            $optimalThreshold,
            $improvement
        );
    }

    private function getEmptyMetrics(): array
    {
        return [
            'accuracy' => 0.0,
            'precision' => 0.0,
            'recall' => 0.0,
            'f1_score' => 0.0,
            'confusion_matrix' => [
                'true_positive' => 0,
                'true_negative' => 0,
                'false_positive' => 0,
                'false_negative' => 0,
            ],
        ];
    }
}