0.7, 'algorithm' => 'random_forest', 'features' => 25 ], createdAt: Timestamp::now() ); $registry->register($metadataA); // Version B: New candidate model (improved) $metadataB = new ModelMetadata( modelName: $modelName, modelType: ModelType::SUPERVISED, version: $versionB, configuration: [ 'threshold' => 0.65, 'algorithm' => 'xgboost', 'features' => 30 ], createdAt: Timestamp::now() ); $registry->register($metadataB); echo " ✓ Registered version A (1.0.0) - Current production model\n"; echo " ✓ Registered version B (2.0.0) - New candidate model\n\n"; // ======================================================================== // Setup: Simulate performance data for both versions // ======================================================================== echo "3. Simulating performance data...\n"; $timestamp = Timestamp::now(); // Version A: 85% accuracy (baseline) $predictionsA = [ // Correct predictions (85%) ...array_fill(0, 85, ['confidence' => 0.8, 'actual' => true, 'prediction' => true]), // Incorrect predictions (15%) ...array_fill(0, 15, ['confidence' => 0.75, 'actual' => true, 'prediction' => false]), ]; foreach ($predictionsA as $pred) { $storage->storePrediction([ 'model_name' => $modelName, 'version' => $versionA->toString(), 'prediction' => $pred['prediction'], 'actual' => $pred['actual'], 'confidence' => $pred['confidence'], 'features' => [], 'timestamp' => $timestamp->toDateTime(), 'is_correct' => $pred['prediction'] === $pred['actual'], ]); } // Version B: 92% accuracy (improved) $predictionsB = [ // Correct predictions (92%) ...array_fill(0, 92, ['confidence' => 0.85, 'actual' => true, 'prediction' => true]), // Incorrect predictions (8%) ...array_fill(0, 8, ['confidence' => 0.7, 'actual' => true, 'prediction' => false]), ]; foreach ($predictionsB as $pred) { $storage->storePrediction([ 'model_name' => $modelName, 'version' => $versionB->toString(), 'prediction' => $pred['prediction'], 'actual' => $pred['actual'], 'confidence' => $pred['confidence'], 'features' => [], 'timestamp' => $timestamp->toDateTime(), 'is_correct' => $pred['prediction'] === $pred['actual'], ]); } echo " ✓ Version A: 100 predictions, 85% accuracy\n"; echo " ✓ Version B: 100 predictions, 92% accuracy\n\n"; // ======================================================================== // Test 1: Balanced 50/50 A/B Test // ======================================================================== echo "4. Testing balanced 50/50 traffic split...\n"; $balancedConfig = new ABTestConfig( modelName: $modelName, versionA: $versionA, versionB: $versionB, trafficSplitA: 0.5, primaryMetric: 'accuracy' ); echo " → Configuration:\n"; echo " {$balancedConfig->getDescription()}\n"; // Simulate 1000 routing decisions $routingResults = ['A' => 0, 'B' => 0]; for ($i = 0; $i < 1000; $i++) { $selected = $abTesting->selectVersion($balancedConfig); $routingResults[$selected->equals($versionA) ? 'A' : 'B']++; } $percentA = ($routingResults['A'] / 1000) * 100; $percentB = ($routingResults['B'] / 1000) * 100; echo " → Traffic Routing (1000 requests):\n"; echo " Version A: {$routingResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n"; echo " Version B: {$routingResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n"; // ======================================================================== // Test 2: Model Performance Comparison // ======================================================================== echo "5. Comparing model performance...\n"; $comparisonResult = $abTesting->runTest($balancedConfig); echo " → Comparison Results:\n"; echo " Winner: {$comparisonResult->winner}\n"; echo " Statistically Significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . "\n"; echo " Primary Metric Improvement: " . sprintf("%+.2f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . "\n"; echo " → Summary:\n"; echo " {$comparisonResult->getSummary()}\n"; echo " → Recommendation:\n"; echo " {$comparisonResult->recommendation}\n\n"; // ======================================================================== // Test 3: Gradual Rollout Configuration // ======================================================================== echo "6. Testing gradual rollout configuration...\n"; $gradualConfig = ABTestConfig::forGradualRollout( modelName: $modelName, currentVersion: $versionA, newVersion: $versionB ); echo " → Configuration:\n"; echo " {$gradualConfig->getDescription()}\n"; // Simulate 1000 routing decisions with gradual rollout $gradualResults = ['A' => 0, 'B' => 0]; for ($i = 0; $i < 1000; $i++) { $selected = $abTesting->selectVersion($gradualConfig); $gradualResults[$selected->equals($versionA) ? 'A' : 'B']++; } $percentA = ($gradualResults['A'] / 1000) * 100; $percentB = ($gradualResults['B'] / 1000) * 100; echo " → Traffic Routing (1000 requests):\n"; echo " Version A (current): {$gradualResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n"; echo " Version B (new): {$gradualResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n"; // ======================================================================== // Test 4: Champion/Challenger Test // ======================================================================== echo "7. Testing champion/challenger configuration...\n"; $challengerConfig = ABTestConfig::forChallenger( modelName: $modelName, champion: $versionA, challenger: $versionB ); echo " → Configuration:\n"; echo " {$challengerConfig->getDescription()}\n"; // Simulate 1000 routing decisions with champion/challenger $challengerResults = ['Champion' => 0, 'Challenger' => 0]; for ($i = 0; $i < 1000; $i++) { $selected = $abTesting->selectVersion($challengerConfig); $challengerResults[$selected->equals($versionA) ? 'Champion' : 'Challenger']++; } $percentChampion = ($challengerResults['Champion'] / 1000) * 100; $percentChallenger = ($challengerResults['Challenger'] / 1000) * 100; echo " → Traffic Routing (1000 requests):\n"; echo " Champion (A): {$challengerResults['Champion']} requests (" . sprintf("%.1f%%", $percentChampion) . ")\n"; echo " Challenger (B): {$challengerResults['Challenger']} requests (" . sprintf("%.1f%%", $percentChallenger) . ")\n\n"; // ======================================================================== // Test 5: Automated Test Execution // ======================================================================== echo "8. Running automated A/B test...\n"; $autoTestResult = $abTesting->runTest($balancedConfig); echo " → Automated Test Results:\n"; echo " Winner: {$autoTestResult->winner}\n"; echo " Should Deploy Version B: " . ($autoTestResult->shouldDeployVersionB() ? 'YES' : 'NO') . "\n"; echo " Is Inconclusive: " . ($autoTestResult->isInconclusive() ? 'YES' : 'NO') . "\n"; echo " → Metrics Difference:\n"; foreach ($autoTestResult->metricsDifference as $metric => $diff) { echo " {$metric}: " . sprintf("%+.4f", $diff) . "\n"; } echo "\n"; // ======================================================================== // Test 6: Rollout Planning // ======================================================================== echo "9. Generating rollout plan...\n"; $rolloutPlan = $abTesting->generateRolloutPlan(steps: 5); echo " → Rollout Plan (5 stages):\n"; foreach ($rolloutPlan as $step => $trafficSplitB) { $percentB = (int) ($trafficSplitB * 100); $percentA = 100 - $percentB; echo " Stage {$step}: Version A {$percentA}%, Version B {$percentB}%\n"; } echo "\n"; // ======================================================================== // Test 7: Sample Size Calculation // ======================================================================== echo "10. Calculating required sample size...\n"; $requiredSamples = $abTesting->calculateRequiredSampleSize( confidenceLevel: 0.95, // 95% confidence marginOfError: 0.05 // 5% margin of error ); echo " → Sample Size Requirements:\n"; echo " Confidence Level: 95%\n"; echo " Margin of Error: 5%\n"; echo " Required Samples per Version: {$requiredSamples}\n\n"; // ======================================================================== // Test Summary // ======================================================================== echo "=== Test Summary ===\n"; echo "✓ Balanced 50/50 A/B Test: Working\n"; echo "✓ Model Performance Comparison: Working\n"; echo "✓ Gradual Rollout Configuration: Working\n"; echo "✓ Champion/Challenger Test: Working\n"; echo "✓ Automated Test Execution: Working\n"; echo "✓ Rollout Planning: Working\n"; echo "✓ Sample Size Calculation: Working\n\n"; echo "Key Findings:\n"; echo " - Version B shows " . sprintf("%.1f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . " improvement over Version A\n"; echo " - Winner: {$comparisonResult->winner} (statistically significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . ")\n"; echo " - Recommendation: {$comparisonResult->recommendation}\n"; echo " - Balanced 50/50 split achieved ~50% traffic to each version\n"; echo " - Gradual rollout achieved ~90/10 split for safe deployment\n"; echo " - Champion/challenger achieved ~80/20 split for validation\n"; echo " - Automated test execution and rollout planning functional\n\n"; echo "=== A/B Testing Workflows PASSED ===\n"; } catch (\Throwable $e) { echo "\n!!! TEST FAILED !!!\n"; echo "Error: " . $e->getMessage() . "\n"; echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n"; echo "\nStack trace:\n" . $e->getTraceAsString() . "\n"; exit(1); }