feat(Deployment): Integrate Ansible deployment via PHP deployment pipeline
- Create AnsibleDeployStage using framework's Process module for secure command execution - Integrate AnsibleDeployStage into DeploymentPipelineCommands for production deployments - Add force_deploy flag support in Ansible playbook to override stale locks - Use PHP deployment module as orchestrator (php console.php deploy:production) - Fix ErrorAggregationInitializer to use Environment class instead of $_ENV superglobal Architecture: - BuildStage → AnsibleDeployStage → HealthCheckStage for production - Process module provides timeout, error handling, and output capture - Ansible playbook supports rollback via rollback-git-based.yml - Zero-downtime deployments with health checks
This commit is contained in:
322
tests/debug/test-ab-testing.php
Normal file
322
tests/debug/test-ab-testing.php
Normal file
@@ -0,0 +1,322 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* A/B Testing System Workflow Tests
|
||||
*
|
||||
* Demonstrates A/B testing capabilities:
|
||||
* 1. Traffic splitting between model versions
|
||||
* 2. Model performance comparison
|
||||
* 3. Statistical significance testing
|
||||
* 4. Winner determination and recommendations
|
||||
* 5. Gradual rollout planning
|
||||
*/
|
||||
|
||||
require_once __DIR__ . '/../../vendor/autoload.php';
|
||||
|
||||
use App\Framework\MachineLearning\ModelManagement\ABTestingService;
|
||||
use App\Framework\MachineLearning\ModelManagement\InMemoryModelRegistry;
|
||||
use App\Framework\MachineLearning\ModelManagement\ModelPerformanceMonitor;
|
||||
use App\Framework\MachineLearning\ModelManagement\InMemoryPerformanceStorage;
|
||||
use App\Framework\MachineLearning\ModelManagement\NullAlertingService;
|
||||
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ABTestConfig;
|
||||
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelMetadata;
|
||||
use App\Framework\MachineLearning\ModelManagement\ValueObjects\ModelType;
|
||||
use App\Framework\Core\ValueObjects\Version;
|
||||
use App\Framework\Core\ValueObjects\Timestamp;
|
||||
use App\Framework\Core\ValueObjects\Duration;
|
||||
use App\Framework\Random\SecureRandomGenerator;
|
||||
|
||||
echo "=== A/B Testing System Workflow Tests ===\n\n";
|
||||
|
||||
try {
|
||||
// ========================================================================
|
||||
// Setup: Initialize infrastructure
|
||||
// ========================================================================
|
||||
echo "1. Initializing A/B Testing Infrastructure...\n";
|
||||
|
||||
$registry = new InMemoryModelRegistry();
|
||||
$storage = new InMemoryPerformanceStorage();
|
||||
$alerting = new NullAlertingService();
|
||||
$performanceMonitor = new ModelPerformanceMonitor($registry, $storage, $alerting);
|
||||
$random = new SecureRandomGenerator();
|
||||
$abTesting = new ABTestingService($random, $registry);
|
||||
|
||||
echo " ✓ ModelRegistry created\n";
|
||||
echo " ✓ PerformanceMonitor created\n";
|
||||
echo " ✓ ABTestingService created\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Setup: Register two model versions with different performance
|
||||
// ========================================================================
|
||||
echo "2. Registering two model versions with different performance...\n";
|
||||
|
||||
$modelName = 'fraud-detector';
|
||||
$versionA = Version::fromString('1.0.0');
|
||||
$versionB = Version::fromString('2.0.0');
|
||||
|
||||
// Version A: Current production model (baseline)
|
||||
$metadataA = new ModelMetadata(
|
||||
modelName: $modelName,
|
||||
modelType: ModelType::SUPERVISED,
|
||||
version: $versionA,
|
||||
configuration: [
|
||||
'threshold' => 0.7,
|
||||
'algorithm' => 'random_forest',
|
||||
'features' => 25
|
||||
],
|
||||
createdAt: Timestamp::now()
|
||||
);
|
||||
|
||||
$registry->register($metadataA);
|
||||
|
||||
// Version B: New candidate model (improved)
|
||||
$metadataB = new ModelMetadata(
|
||||
modelName: $modelName,
|
||||
modelType: ModelType::SUPERVISED,
|
||||
version: $versionB,
|
||||
configuration: [
|
||||
'threshold' => 0.65,
|
||||
'algorithm' => 'xgboost',
|
||||
'features' => 30
|
||||
],
|
||||
createdAt: Timestamp::now()
|
||||
);
|
||||
|
||||
$registry->register($metadataB);
|
||||
|
||||
echo " ✓ Registered version A (1.0.0) - Current production model\n";
|
||||
echo " ✓ Registered version B (2.0.0) - New candidate model\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Setup: Simulate performance data for both versions
|
||||
// ========================================================================
|
||||
echo "3. Simulating performance data...\n";
|
||||
|
||||
$timestamp = Timestamp::now();
|
||||
|
||||
// Version A: 85% accuracy (baseline)
|
||||
$predictionsA = [
|
||||
// Correct predictions (85%)
|
||||
...array_fill(0, 85, ['confidence' => 0.8, 'actual' => true, 'prediction' => true]),
|
||||
// Incorrect predictions (15%)
|
||||
...array_fill(0, 15, ['confidence' => 0.75, 'actual' => true, 'prediction' => false]),
|
||||
];
|
||||
|
||||
foreach ($predictionsA as $pred) {
|
||||
$storage->storePrediction([
|
||||
'model_name' => $modelName,
|
||||
'version' => $versionA->toString(),
|
||||
'prediction' => $pred['prediction'],
|
||||
'actual' => $pred['actual'],
|
||||
'confidence' => $pred['confidence'],
|
||||
'features' => [],
|
||||
'timestamp' => $timestamp->toDateTime(),
|
||||
'is_correct' => $pred['prediction'] === $pred['actual'],
|
||||
]);
|
||||
}
|
||||
|
||||
// Version B: 92% accuracy (improved)
|
||||
$predictionsB = [
|
||||
// Correct predictions (92%)
|
||||
...array_fill(0, 92, ['confidence' => 0.85, 'actual' => true, 'prediction' => true]),
|
||||
// Incorrect predictions (8%)
|
||||
...array_fill(0, 8, ['confidence' => 0.7, 'actual' => true, 'prediction' => false]),
|
||||
];
|
||||
|
||||
foreach ($predictionsB as $pred) {
|
||||
$storage->storePrediction([
|
||||
'model_name' => $modelName,
|
||||
'version' => $versionB->toString(),
|
||||
'prediction' => $pred['prediction'],
|
||||
'actual' => $pred['actual'],
|
||||
'confidence' => $pred['confidence'],
|
||||
'features' => [],
|
||||
'timestamp' => $timestamp->toDateTime(),
|
||||
'is_correct' => $pred['prediction'] === $pred['actual'],
|
||||
]);
|
||||
}
|
||||
|
||||
echo " ✓ Version A: 100 predictions, 85% accuracy\n";
|
||||
echo " ✓ Version B: 100 predictions, 92% accuracy\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 1: Balanced 50/50 A/B Test
|
||||
// ========================================================================
|
||||
echo "4. Testing balanced 50/50 traffic split...\n";
|
||||
|
||||
$balancedConfig = new ABTestConfig(
|
||||
modelName: $modelName,
|
||||
versionA: $versionA,
|
||||
versionB: $versionB,
|
||||
trafficSplitA: 0.5,
|
||||
primaryMetric: 'accuracy'
|
||||
);
|
||||
|
||||
echo " → Configuration:\n";
|
||||
echo " {$balancedConfig->getDescription()}\n";
|
||||
|
||||
// Simulate 1000 routing decisions
|
||||
$routingResults = ['A' => 0, 'B' => 0];
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$selected = $abTesting->selectVersion($balancedConfig);
|
||||
$routingResults[$selected->equals($versionA) ? 'A' : 'B']++;
|
||||
}
|
||||
|
||||
$percentA = ($routingResults['A'] / 1000) * 100;
|
||||
$percentB = ($routingResults['B'] / 1000) * 100;
|
||||
|
||||
echo " → Traffic Routing (1000 requests):\n";
|
||||
echo " Version A: {$routingResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
|
||||
echo " Version B: {$routingResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 2: Model Performance Comparison
|
||||
// ========================================================================
|
||||
echo "5. Comparing model performance...\n";
|
||||
|
||||
$comparisonResult = $abTesting->runTest($balancedConfig);
|
||||
|
||||
echo " → Comparison Results:\n";
|
||||
echo " Winner: {$comparisonResult->winner}\n";
|
||||
echo " Statistically Significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . "\n";
|
||||
echo " Primary Metric Improvement: " . sprintf("%+.2f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . "\n";
|
||||
echo " → Summary:\n";
|
||||
echo " {$comparisonResult->getSummary()}\n";
|
||||
echo " → Recommendation:\n";
|
||||
echo " {$comparisonResult->recommendation}\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 3: Gradual Rollout Configuration
|
||||
// ========================================================================
|
||||
echo "6. Testing gradual rollout configuration...\n";
|
||||
|
||||
$gradualConfig = ABTestConfig::forGradualRollout(
|
||||
modelName: $modelName,
|
||||
currentVersion: $versionA,
|
||||
newVersion: $versionB
|
||||
);
|
||||
|
||||
echo " → Configuration:\n";
|
||||
echo " {$gradualConfig->getDescription()}\n";
|
||||
|
||||
// Simulate 1000 routing decisions with gradual rollout
|
||||
$gradualResults = ['A' => 0, 'B' => 0];
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$selected = $abTesting->selectVersion($gradualConfig);
|
||||
$gradualResults[$selected->equals($versionA) ? 'A' : 'B']++;
|
||||
}
|
||||
|
||||
$percentA = ($gradualResults['A'] / 1000) * 100;
|
||||
$percentB = ($gradualResults['B'] / 1000) * 100;
|
||||
|
||||
echo " → Traffic Routing (1000 requests):\n";
|
||||
echo " Version A (current): {$gradualResults['A']} requests (" . sprintf("%.1f%%", $percentA) . ")\n";
|
||||
echo " Version B (new): {$gradualResults['B']} requests (" . sprintf("%.1f%%", $percentB) . ")\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 4: Champion/Challenger Test
|
||||
// ========================================================================
|
||||
echo "7. Testing champion/challenger configuration...\n";
|
||||
|
||||
$challengerConfig = ABTestConfig::forChallenger(
|
||||
modelName: $modelName,
|
||||
champion: $versionA,
|
||||
challenger: $versionB
|
||||
);
|
||||
|
||||
echo " → Configuration:\n";
|
||||
echo " {$challengerConfig->getDescription()}\n";
|
||||
|
||||
// Simulate 1000 routing decisions with champion/challenger
|
||||
$challengerResults = ['Champion' => 0, 'Challenger' => 0];
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$selected = $abTesting->selectVersion($challengerConfig);
|
||||
$challengerResults[$selected->equals($versionA) ? 'Champion' : 'Challenger']++;
|
||||
}
|
||||
|
||||
$percentChampion = ($challengerResults['Champion'] / 1000) * 100;
|
||||
$percentChallenger = ($challengerResults['Challenger'] / 1000) * 100;
|
||||
|
||||
echo " → Traffic Routing (1000 requests):\n";
|
||||
echo " Champion (A): {$challengerResults['Champion']} requests (" . sprintf("%.1f%%", $percentChampion) . ")\n";
|
||||
echo " Challenger (B): {$challengerResults['Challenger']} requests (" . sprintf("%.1f%%", $percentChallenger) . ")\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 5: Automated Test Execution
|
||||
// ========================================================================
|
||||
echo "8. Running automated A/B test...\n";
|
||||
|
||||
$autoTestResult = $abTesting->runTest($balancedConfig);
|
||||
|
||||
echo " → Automated Test Results:\n";
|
||||
echo " Winner: {$autoTestResult->winner}\n";
|
||||
echo " Should Deploy Version B: " . ($autoTestResult->shouldDeployVersionB() ? 'YES' : 'NO') . "\n";
|
||||
echo " Is Inconclusive: " . ($autoTestResult->isInconclusive() ? 'YES' : 'NO') . "\n";
|
||||
echo " → Metrics Difference:\n";
|
||||
foreach ($autoTestResult->metricsDifference as $metric => $diff) {
|
||||
echo " {$metric}: " . sprintf("%+.4f", $diff) . "\n";
|
||||
}
|
||||
echo "\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 6: Rollout Planning
|
||||
// ========================================================================
|
||||
echo "9. Generating rollout plan...\n";
|
||||
|
||||
$rolloutPlan = $abTesting->generateRolloutPlan(steps: 5);
|
||||
|
||||
echo " → Rollout Plan (5 stages):\n";
|
||||
foreach ($rolloutPlan as $step => $trafficSplitB) {
|
||||
$percentB = (int) ($trafficSplitB * 100);
|
||||
$percentA = 100 - $percentB;
|
||||
echo " Stage {$step}: Version A {$percentA}%, Version B {$percentB}%\n";
|
||||
}
|
||||
echo "\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test 7: Sample Size Calculation
|
||||
// ========================================================================
|
||||
echo "10. Calculating required sample size...\n";
|
||||
|
||||
$requiredSamples = $abTesting->calculateRequiredSampleSize(
|
||||
confidenceLevel: 0.95, // 95% confidence
|
||||
marginOfError: 0.05 // 5% margin of error
|
||||
);
|
||||
|
||||
echo " → Sample Size Requirements:\n";
|
||||
echo " Confidence Level: 95%\n";
|
||||
echo " Margin of Error: 5%\n";
|
||||
echo " Required Samples per Version: {$requiredSamples}\n\n";
|
||||
|
||||
// ========================================================================
|
||||
// Test Summary
|
||||
// ========================================================================
|
||||
echo "=== Test Summary ===\n";
|
||||
echo "✓ Balanced 50/50 A/B Test: Working\n";
|
||||
echo "✓ Model Performance Comparison: Working\n";
|
||||
echo "✓ Gradual Rollout Configuration: Working\n";
|
||||
echo "✓ Champion/Challenger Test: Working\n";
|
||||
echo "✓ Automated Test Execution: Working\n";
|
||||
echo "✓ Rollout Planning: Working\n";
|
||||
echo "✓ Sample Size Calculation: Working\n\n";
|
||||
|
||||
echo "Key Findings:\n";
|
||||
echo " - Version B shows " . sprintf("%.1f%%", $comparisonResult->getPrimaryMetricImprovementPercent()) . " improvement over Version A\n";
|
||||
echo " - Winner: {$comparisonResult->winner} (statistically significant: " . ($comparisonResult->isStatisticallySignificant ? 'YES' : 'NO') . ")\n";
|
||||
echo " - Recommendation: {$comparisonResult->recommendation}\n";
|
||||
echo " - Balanced 50/50 split achieved ~50% traffic to each version\n";
|
||||
echo " - Gradual rollout achieved ~90/10 split for safe deployment\n";
|
||||
echo " - Champion/challenger achieved ~80/20 split for validation\n";
|
||||
echo " - Automated test execution and rollout planning functional\n\n";
|
||||
|
||||
echo "=== A/B Testing Workflows PASSED ===\n";
|
||||
|
||||
} catch (\Throwable $e) {
|
||||
echo "\n!!! TEST FAILED !!!\n";
|
||||
echo "Error: " . $e->getMessage() . "\n";
|
||||
echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
|
||||
echo "\nStack trace:\n" . $e->getTraceAsString() . "\n";
|
||||
exit(1);
|
||||
}
|
||||
Reference in New Issue
Block a user