feat(Deployment): Integrate Ansible deployment via PHP deployment pipeline

- Create AnsibleDeployStage using framework's Process module for secure command execution - Integrate AnsibleDeployStage into DeploymentPipelineCommands for production deployments - Add force_deploy flag support in Ansible playbook to override stale locks - Use PHP deployment module as orchestrator (php console.php deploy:production) - Fix ErrorAggregationInitializer to use Environment class instead of $_ENV superglobal Architecture: - BuildStage → AnsibleDeployStage → HealthCheckStage for production - Process module provides timeout, error handling, and output capture - Ansible playbook supports rollback via rollback-git-based.yml - Zero-downtime deployments with health checks
2025-10-26 14:08:07 +01:00
parent a90263d3be
commit 3b623e7afb
170 changed files with 19888 additions and 575 deletions
--- a/tests/debug/test-ab-testing.php
+++ b/tests/debug/test-ab-testing.php
@@ -0,0 +1,322 @@
+<?php
+
+declare(strict_types=1);
+
+/**
+ * A/B Testing System Workflow Tests
+ *
+ * Demonstrates A/B testing capabilities:
+ * 1. Traffic splitting between model versions
+ * 2. Model performance comparison
+ * 3. Statistical significance testing
+ * 4. Winner determination and recommendations
+ * 5. Gradual rollout planning
+ */
+
+require_once __DIR__ . '/../../vendor/autoload.php';
+
+use App\Framework\MachineLearning\ModelManagement\ABTestingService;
+use App\Framework\MachineLearning\ModelManagement\InMemoryModelRegistry;
+use App\Framework\MachineLearning\ModelManagement\ModelPerformanceMonitor;
+use App\Framework\MachineLearning\ModelManagement\InMemoryPerformanceStorage;
+use App\Framework\MachineLearning\ModelManagement\NullAlertingService;
+use App\Framework\MachineLearning\ModelManagement\ValueObjects\ABTestConfig;
+use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
+use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelType;
+use App\Framework\Core\ValueObjects\Version;
+use App\Framework\Core\ValueObjects\Timestamp;
+use App\Framework\Core\ValueObjects\Duration;
+use App\Framework\Random\SecureRandomGenerator;
+
+echo "=== A/B Testing System Workflow Tests ===\n\n";
+
+try {
+    // ========================================================================
+    // Setup: Initialize infrastructure
+    // ========================================================================
+    echo "1. Initializing A/B Testing Infrastructure...\n";
+
+    $registry = new InMemoryModelRegistry();
+    $storage = new InMemoryPerformanceStorage();
+    $alerting = new NullAlertingService();
+    $performanceMonitor = new ModelPerformanceMonitor($registry, $storage, $alerting);
+    $random = new SecureRandomGenerator();
+    $abTesting = new ABTestingService($random, $registry);
+
+    echo "   ✓ ModelRegistry created\n";
+    echo "   ✓ PerformanceMonitor created\n";
+    echo "   ✓ ABTestingService created\n\n";
+
+    // ========================================================================
+    // Setup: Register two model versions with different performance
+    // ========================================================================
+    echo "2. Registering two model versions with different performance...\n";
+
+    $modelName = 'fraud-detector';
+    $versionA = Version::fromString('1.0.0');
+    $versionB = Version::fromString('2.0.0');
+
+    // Version A: Current production model (baseline)
+    $metadataA = new ModelMetadata(
+        modelName: $modelName,
+        modelType: ModelType::SUPERVISED,
+        version: $versionA,
+        configuration: [
+            'threshold' => 0.7,
+            'algorithm' => 'random_forest',
+            'features' => 25
+        ],
+        createdAt: Timestamp::now()
+    );
+
+    $registry->register($metadataA);
+
+    // Version B: New candidate model (improved)
+    $metadataB = new ModelMetadata(
+        modelName: $modelName,
+        modelType: ModelType::SUPERVISED,
+        version: $versionB,
+        configuration: [
+            'threshold' => 0.65,
+            'algorithm' => 'xgboost',
+            'features' => 30
+        ],
+        createdAt: Timestamp::now()
+    );
+
+    $registry->register($metadataB);
+
+    echo "   ✓ Registered version A (1.0.0) - Current production model\n";
+    echo "   ✓ Registered version B (2.0.0) - New candidate model\n\n";
+
+    // ========================================================================
+    // Setup: Simulate performance data for both versions
+    // ========================================================================
+    echo "3. Simulating performance data...\n";
+
+    $timestamp = Timestamp::now();
+
+    // Version A: 85% accuracy (baseline)
+    $predictionsA = [
+        // Correct predictions (85%)
+        ...array_fill(0, 85, ['confidence' => 0.8, 'actual' => true, 'prediction' => true]),
+        // Incorrect predictions (15%)
+        ...array_fill(0, 15, ['confidence' => 0.75, 'actual' => true, 'prediction' => false]),
+    ];
+
+    foreach ($predictionsA as $pred) {
+        $storage->storePrediction([
+            'model_name' => $modelName,
+            'version' => $versionA->toString(),
+            'prediction' => $pred['prediction'],
+            'actual' => $pred['actual'],
+            'confidence' => $pred['confidence'],
+            'features' => [],
+            'timestamp' => $timestamp->toDateTime(),
+            'is_correct' => $pred['prediction'] === $pred['actual'],
+        ]);
+    }
+
+    // Version B: 92% accuracy (improved)
+    $predictionsB = [
+        // Correct predictions (92%)
+        ...array_fill(0, 92, ['confidence' => 0.85, 'actual' => true, 'prediction' => true]),
+        // Incorrect predictions (8%)
+        ...array_fill(0, 8, ['confidence' => 0.7, 'actual' => true, 'prediction' => false]),
+    ];
+
+    foreach ($predictionsB as $pred) {
+        $storage->storePrediction([
+            'model_name' => $modelName,
+            'version' => $versionB->toString(),
+            'prediction' => $pred['prediction'],
+            'actual' => $pred['actual'],
+            'confidence' => $pred['confidence'],
+            'features' => [],
+            'timestamp' => $timestamp->toDateTime(),
+            'is_correct' => $pred['prediction'] === $pred['actual'],
+        ]);
+    }
+
+    echo "   ✓ Version A: 100 predictions, 85% accuracy\n";
+    echo "   ✓ Version B: 100 predictions, 92% accuracy\n\n";
+
+    // ========================================================================
+    // Test 1: Balanced 50/50 A/B Test
+    // ========================================================================
+    echo "4. Testing balanced 50/50 traffic split...\n";
+
+    $balancedConfig = new ABTestConfig(
+        modelName: $modelName,
+        versionA: $versionA,
+        versionB: $versionB,
+        trafficSplitA: 0.5,
+        primaryMetric: 'accuracy'
+    );
+
+    echo "   → Configuration:\n";
+    echo "      {$balancedConfig->getDescription()}\n";
+
+    // Simulate 1000 routing decisions
+    $routingResults = ['A' => 0, 'B' => 0];
+    for ($i = 0; $i < 1000; $i++) {
+        $selected = $abTesting->selectVersion($balancedConfig);
+        $routingResults[$selected->equals($versionA) ? 'A' : 'B']++;
+    }
+
+    $percentA = ($routingResults['A'] / 1000) * 100;
+    $percentB = ($routingResults['B'] / 1000) * 100;
+
+    echo "   → Traffic Routing (1000 requests):\n";
+    echo "      Version A: {$routingResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
+    echo "      Version B: {$routingResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
+
+    // ========================================================================
+    // Test 2: Model Performance Comparison
+    // ========================================================================
+    echo "5. Comparing model performance...\n";
+
+    $comparisonResult = $abTesting->runTest($balancedConfig);
+
+    echo "   → Comparison Results:\n";
+    echo "      Winner: {$comparisonResult->winner}\n";
+    echo "      Statistically Significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . "\n";
+    echo "      Primary Metric Improvement: " . sprintf("%+.2f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . "\n";
+    echo "   → Summary:\n";
+    echo "      {$comparisonResult->getSummary()}\n";
+    echo "   → Recommendation:\n";
+    echo "      {$comparisonResult->recommendation}\n\n";
+
+    // ========================================================================
+    // Test 3: Gradual Rollout Configuration
+    // ========================================================================
+    echo "6. Testing gradual rollout configuration...\n";
+
+    $gradualConfig = ABTestConfig::forGradualRollout(
+        modelName: $modelName,
+        currentVersion: $versionA,
+        newVersion: $versionB
+    );
+
+    echo "   → Configuration:\n";
+    echo "      {$gradualConfig->getDescription()}\n";
+
+    // Simulate 1000 routing decisions with gradual rollout
+    $gradualResults = ['A' => 0, 'B' => 0];
+    for ($i = 0; $i < 1000; $i++) {
+        $selected = $abTesting->selectVersion($gradualConfig);
+        $gradualResults[$selected->equals($versionA) ? 'A' : 'B']++;
+    }
+
+    $percentA = ($gradualResults['A'] / 1000) * 100;
+    $percentB = ($gradualResults['B'] / 1000) * 100;
+
+    echo "   → Traffic Routing (1000 requests):\n";
+    echo "      Version A (current): {$gradualResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
+    echo "      Version B (new): {$gradualResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
+
+    // ========================================================================
+    // Test 4: Champion/Challenger Test
+    // ========================================================================
+    echo "7. Testing champion/challenger configuration...\n";
+
+    $challengerConfig = ABTestConfig::forChallenger(
+        modelName: $modelName,
+        champion: $versionA,
+        challenger: $versionB
+    );
+
+    echo "   → Configuration:\n";
+    echo "      {$challengerConfig->getDescription()}\n";
+
+    // Simulate 1000 routing decisions with champion/challenger
+    $challengerResults = ['Champion' => 0, 'Challenger' => 0];
+    for ($i = 0; $i < 1000; $i++) {
+        $selected = $abTesting->selectVersion($challengerConfig);
+        $challengerResults[$selected->equals($versionA) ? 'Champion' : 'Challenger']++;
+    }
+
+    $percentChampion = ($challengerResults['Champion'] / 1000) * 100;
+    $percentChallenger = ($challengerResults['Challenger'] / 1000) * 100;
+
+    echo "   → Traffic Routing (1000 requests):\n";
+    echo "      Champion (A): {$challengerResults['Champion']} requests (" . sprintf("%.1f%%", $percentChampion) . ")\n";
+    echo "      Challenger (B): {$challengerResults['Challenger']} requests (" . sprintf("%.1f%%", $percentChallenger) . ")\n\n";
+
+    // ========================================================================
+    // Test 5: Automated Test Execution
+    // ========================================================================
+    echo "8. Running automated A/B test...\n";
+
+    $autoTestResult = $abTesting->runTest($balancedConfig);
+
+    echo "   → Automated Test Results:\n";
+    echo "      Winner: {$autoTestResult->winner}\n";
+    echo "      Should Deploy Version B: " . ($autoTestResult->shouldDeployVersionB() ? 'YES' : 'NO') . "\n";
+    echo "      Is Inconclusive: " . ($autoTestResult->isInconclusive() ? 'YES' : 'NO') . "\n";
+    echo "   → Metrics Difference:\n";
+    foreach ($autoTestResult->metricsDifference as $metric => $diff) {
+        echo "      {$metric}: " . sprintf("%+.4f", $diff) . "\n";
+    }
+    echo "\n";
+
+    // ========================================================================
+    // Test 6: Rollout Planning
+    // ========================================================================
+    echo "9. Generating rollout plan...\n";
+
+    $rolloutPlan = $abTesting->generateRolloutPlan(steps: 5);
+
+    echo "   → Rollout Plan (5 stages):\n";
+    foreach ($rolloutPlan as $step => $trafficSplitB) {
+        $percentB = (int) ($trafficSplitB * 100);
+        $percentA = 100 - $percentB;
+        echo "      Stage {$step}: Version A {$percentA}%, Version B {$percentB}%\n";
+    }
+    echo "\n";
+
+    // ========================================================================
+    // Test 7: Sample Size Calculation
+    // ========================================================================
+    echo "10. Calculating required sample size...\n";
+
+    $requiredSamples = $abTesting->calculateRequiredSampleSize(
+        confidenceLevel: 0.95,  // 95% confidence
+        marginOfError: 0.05     // 5% margin of error
+    );
+
+    echo "   → Sample Size Requirements:\n";
+    echo "      Confidence Level: 95%\n";
+    echo "      Margin of Error: 5%\n";
+    echo "      Required Samples per Version: {$requiredSamples}\n\n";
+
+    // ========================================================================
+    // Test Summary
+    // ========================================================================
+    echo "=== Test Summary ===\n";
+    echo "✓ Balanced 50/50 A/B Test: Working\n";
+    echo "✓ Model Performance Comparison: Working\n";
+    echo "✓ Gradual Rollout Configuration: Working\n";
+    echo "✓ Champion/Challenger Test: Working\n";
+    echo "✓ Automated Test Execution: Working\n";
+    echo "✓ Rollout Planning: Working\n";
+    echo "✓ Sample Size Calculation: Working\n\n";
+
+    echo "Key Findings:\n";
+    echo "  - Version B shows " . sprintf("%.1f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . " improvement over Version A\n";
+    echo "  - Winner: {$comparisonResult->winner} (statistically significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . ")\n";
+    echo "  - Recommendation: {$comparisonResult->recommendation}\n";
+    echo "  - Balanced 50/50 split achieved ~50% traffic to each version\n";
+    echo "  - Gradual rollout achieved ~90/10 split for safe deployment\n";
+    echo "  - Champion/challenger achieved ~80/20 split for validation\n";
+    echo "  - Automated test execution and rollout planning functional\n\n";
+
+    echo "=== A/B Testing Workflows PASSED ===\n";
+
+} catch (\Throwable $e) {
+    echo "\n!!! TEST FAILED !!!\n";
+    echo "Error: " . $e->getMessage() . "\n";
+    echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
+    echo "\nStack trace:\n" . $e->getTraceAsString() . "\n";
+    exit(1);
+}