michaelschiemer/tests/debug/test-autotuning-workflows.php

<?php

declare(strict_types=1);

/**
 * AutoTuning Engine Workflow Tests
 *
 * Demonstrates auto-tuning capabilities:
 * 1. Threshold optimization via grid search
 * 2. Adaptive threshold adjustment based on performance
 * 3. Precision-recall trade-off optimization
 * 4. Hyperparameter tuning (simulated)
 */

require_once __DIR__ . '/../../vendor/autoload.php';

use App\Framework\MachineLearning\ModelManagement\AutoTuningEngine;
use App\Framework\MachineLearning\ModelManagement\InMemoryModelRegistry;
use App\Framework\MachineLearning\ModelManagement\ModelPerformanceMonitor;
use App\Framework\MachineLearning\ModelManagement\InMemoryPerformanceStorage;
use App\Framework\MachineLearning\ModelManagement\NullAlertingService;
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelType;
use App\Framework\Core\ValueObjects\Version;
use App\Framework\Core\ValueObjects\Timestamp;
use App\Framework\Core\ValueObjects\Duration;

echo "=== AutoTuning Engine Workflow Tests ===\n\n";

try {
    // ========================================================================
    // Setup: Initialize infrastructure with simulated predictions
    // ========================================================================
    echo "1. Initializing Auto-Tuning Infrastructure...\n";

    $registry = new InMemoryModelRegistry();
    $storage = new InMemoryPerformanceStorage();
    $alerting = new NullAlertingService();
    $performanceMonitor = new ModelPerformanceMonitor($registry, $storage, $alerting);
    $autoTuning = new AutoTuningEngine($performanceMonitor, $registry, $storage);

    echo "   ✓ ModelRegistry created\n";
    echo "   ✓ PerformanceStorage created\n";
    echo "   ✓ ModelPerformanceMonitor created\n";
    echo "   ✓ AutoTuningEngine created\n\n";

    // ========================================================================
    // Setup: Register test model with initial threshold
    // ========================================================================
    echo "2. Registering test model with initial configuration...\n";

    $modelName = 'test-anomaly-detector';
    $version = Version::fromString('1.0.0');

    $metadata = new ModelMetadata(
        modelName: $modelName,
        modelType: ModelType::UNSUPERVISED,
        version: $version,
        configuration: [
            'threshold' => 0.7,  // Initial threshold
            'z_score_threshold' => 3.0,
            'iqr_multiplier' => 1.5,
        ],
        createdAt: Timestamp::now()
    );

    $registry->register($metadata);

    echo "   ✓ Model registered: {$modelName} v{$version->toString()}\n";
    echo "   ✓ Initial threshold: 0.7\n\n";

    // ========================================================================
    // Setup: Simulate 150 predictions with varying confidence scores
    // ========================================================================
    echo "3. Simulating 150 historical predictions...\n";

    $timestamp = Timestamp::now();

    // Simulate predictions with various confidence scores and ground truth
    $simulatedPredictions = [
        // True Positives (high confidence, correctly classified)
        ...array_fill(0, 40, ['confidence' => 0.85, 'actual' => true]),
        ...array_fill(0, 20, ['confidence' => 0.75, 'actual' => true]),

        // True Negatives (low confidence, correctly classified)
        ...array_fill(0, 40, ['confidence' => 0.15, 'actual' => false]),
        ...array_fill(0, 20, ['confidence' => 0.25, 'actual' => false]),

        // False Positives (moderate-high confidence, incorrectly classified)
        ...array_fill(0, 15, ['confidence' => 0.72, 'actual' => false]),

        // False Negatives (moderate-low confidence, incorrectly classified)
        ...array_fill(0, 15, ['confidence' => 0.65, 'actual' => true]),
    ];

    // Store predictions in performance storage
    foreach ($simulatedPredictions as $pred) {
        $prediction = $pred['confidence'] >= 0.7;  // Using current threshold

        $storage->storePrediction([
            'model_name' => $modelName,
            'version' => $version->toString(),
            'prediction' => $prediction,
            'actual' => $pred['actual'],
            'confidence' => $pred['confidence'],
            'features' => [],
            'timestamp' => $timestamp->toDateTime(),
            'is_correct' => $prediction === $pred['actual'],
        ]);
    }

    echo "   ✓ Stored 150 predictions\n";
    echo "   ✓ Distribution:\n";
    echo "      - 60 anomalies (true positives)\n";
    echo "      - 60 normal behaviors (true negatives)\n";
    echo "      - 15 false positives (FP)\n";
    echo "      - 15 false negatives (FN)\n\n";

    // ========================================================================
    // Test 1: Current Performance Baseline
    // ========================================================================
    echo "4. Evaluating current performance (threshold = 0.7)...\n";

    $currentMetrics = $performanceMonitor->getCurrentMetrics($modelName, $version);

    echo "   → Current Metrics:\n";
    echo "      Accuracy:  " . sprintf("%.2f%%", $currentMetrics['accuracy'] * 100) . "\n";
    echo "      Precision: " . sprintf("%.2f%%", $currentMetrics['precision'] * 100) . "\n";
    echo "      Recall:    " . sprintf("%.2f%%", $currentMetrics['recall'] * 100) . "\n";
    echo "      F1-Score:  " . sprintf("%.2f%%", $currentMetrics['f1_score'] * 100) . "\n";
    echo "      Total predictions: {$currentMetrics['total_predictions']}\n\n";

    // ========================================================================
    // Test 2: Threshold Optimization (Grid Search)
    // ========================================================================
    echo "5. Running threshold optimization (grid search)...\n";

    $optimizationResult = $autoTuning->optimizeThreshold(
        modelName: $modelName,
        version: $version,
        metricToOptimize: 'f1_score',
        thresholdRange: [0.5, 0.9],
        step: 0.05
    );

    echo "   → Optimization Results:\n";
    echo "      Current threshold:  {$optimizationResult['current_threshold']}\n";
    echo "      Current F1-score:   " . sprintf("%.2f%%", $optimizationResult['current_metric_value'] * 100) . "\n";
    echo "      Optimal threshold:  {$optimizationResult['optimal_threshold']}\n";
    echo "      Optimal F1-score:   " . sprintf("%.2f%%", $optimizationResult['optimal_metric_value'] * 100) . "\n";
    echo "      Improvement:        " . sprintf("%.1f%%", $optimizationResult['improvement_percent']) . "\n";
    echo "   → Recommendation:\n";
    echo "      {$optimizationResult['recommendation']}\n\n";

    // ========================================================================
    // Test 3: Adaptive Threshold Adjustment
    // ========================================================================
    echo "6. Testing adaptive threshold adjustment...\n";

    $adaptiveResult = $autoTuning->adaptiveThresholdAdjustment(
        modelName: $modelName,
        version: $version
    );

    echo "   → Adaptive Adjustment:\n";
    echo "      Current threshold:      {$adaptiveResult['current_threshold']}\n";
    echo "      Recommended threshold:  {$adaptiveResult['recommended_threshold']}\n";
    echo "      False Positive Rate:    " . sprintf("%.1f%%", $adaptiveResult['current_fp_rate'] * 100) . "\n";
    echo "      False Negative Rate:    " . sprintf("%.1f%%", $adaptiveResult['current_fn_rate'] * 100) . "\n";
    echo "   → Reasoning:\n";
    echo "      {$adaptiveResult['adjustment_reason']}\n";
    echo "   → Expected Improvements:\n";
    echo "      Accuracy:  " . sprintf("%+.2f%%", $adaptiveResult['expected_improvement']['accuracy'] * 100) . "\n";
    echo "      Precision: " . sprintf("%+.2f%%", $adaptiveResult['expected_improvement']['precision'] * 100) . "\n";
    echo "      Recall:    " . sprintf("%+.2f%%", $adaptiveResult['expected_improvement']['recall'] * 100) . "\n\n";

    // ========================================================================
    // Test 4: Precision-Recall Trade-off Optimization
    // ========================================================================
    echo "7. Optimizing precision-recall trade-off...\n";
    echo "   → Target: 95% precision with maximum recall\n";

    $tradeoffResult = $autoTuning->optimizePrecisionRecallTradeoff(
        modelName: $modelName,
        version: $version,
        targetPrecision: 0.95,
        thresholdRange: [0.5, 0.99]
    );

    echo "   → Trade-off Results:\n";
    echo "      Optimal threshold:  {$tradeoffResult['optimal_threshold']}\n";
    echo "      Achieved precision: " . sprintf("%.2f%%", $tradeoffResult['achieved_precision'] * 100) . "\n";
    echo "      Achieved recall:    " . sprintf("%.2f%%", $tradeoffResult['achieved_recall'] * 100) . "\n";
    echo "      F1-Score:           " . sprintf("%.2f%%", $tradeoffResult['f1_score'] * 100) . "\n\n";

    // ========================================================================
    // Test 5: Model Configuration Update Workflow
    // ========================================================================
    echo "8. Demonstrating configuration update workflow...\n";

    // Get optimal threshold from grid search
    $newThreshold = $optimizationResult['optimal_threshold'];

    echo "   → Updating model configuration with optimal threshold...\n";
    echo "      Old threshold: {$metadata->configuration['threshold']}\n";
    echo "      New threshold: {$newThreshold}\n";

    // Update metadata with new configuration
    $updatedMetadata = $metadata->withConfiguration([
        'threshold' => $newThreshold,
        'tuning_timestamp' => (string) Timestamp::now(),
        'tuning_method' => 'grid_search',
        'optimization_metric' => 'f1_score',
    ]);

    $registry->update($updatedMetadata);

    echo "   ✓ Configuration updated successfully\n";
    echo "   ✓ Registry updated with new threshold\n\n";

    // ========================================================================
    // Test Summary
    // ========================================================================
    echo "=== Test Summary ===\n";
    echo "✓ Threshold Optimization (Grid Search): Working\n";
    echo "✓ Adaptive Threshold Adjustment: Working\n";
    echo "✓ Precision-Recall Trade-off: Working\n";
    echo "✓ Configuration Update Workflow: Working\n\n";

    echo "Key Findings:\n";
    echo "  - Current threshold (0.7): F1-score = " . sprintf("%.2f%%", $optimizationResult['current_metric_value'] * 100) . "\n";
    echo "  - Optimal threshold ({$optimizationResult['optimal_threshold']}): F1-score = " . sprintf("%.2f%%", $optimizationResult['optimal_metric_value'] * 100) . "\n";
    echo "  - Performance gain: " . sprintf("%.1f%%", $optimizationResult['improvement_percent']) . "\n";
    echo "  - Adaptive recommendation: {$adaptiveResult['adjustment_reason']}\n";
    echo "  - High precision threshold (95%): {$tradeoffResult['optimal_threshold']} with recall = " . sprintf("%.2f%%", $tradeoffResult['achieved_recall'] * 100) . "\n\n";

    echo "=== AutoTuning Workflows PASSED ===\n";

} catch (\Throwable $e) {
    echo "\n!!! TEST FAILED !!!\n";
    echo "Error: " . $e->getMessage() . "\n";
    echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
    echo "\nStack trace:\n" . $e->getTraceAsString() . "\n";
    exit(1);
}