query->sql; $suggestions = []; $issues = []; $indexRecommendations = []; // Basic SQL analysis $this->analyzeSelectStatements($sql, $suggestions, $issues); $this->analyzeWhereClause($sql, $suggestions, $indexRecommendations); $this->analyzeJoins($sql, $suggestions, $issues); $this->analyzeAggregations($sql, $suggestions); $this->analyzeSubqueries($sql, $suggestions, $issues); // Performance analysis based on execution metrics $this->analyzePerformanceMetrics($profile, $suggestions, $issues); // Try to get execution plan if possible $executionPlan = $this->getExecutionPlan($sql); return new QueryAnalysis( profile: $profile, suggestions: $suggestions, issues: $issues, indexRecommendations: $indexRecommendations, executionPlan: $executionPlan, optimizationScore: $this->calculateOptimizationScore($profile, $issues, $suggestions) ); } /** * Analyze SELECT statements */ private function analyzeSelectStatements(string $sql, array &$suggestions, array &$issues): void { $upperSql = strtoupper($sql); // Check for SELECT * if (str_contains($upperSql, 'SELECT *')) { $issues[] = 'Using SELECT * can fetch unnecessary columns and hurt performance'; $suggestions[] = 'Specify only the columns you need instead of using SELECT *'; } // Check for DISTINCT usage if (str_contains($upperSql, 'DISTINCT') && str_contains($upperSql, 'ORDER BY')) { $suggestions[] = 'DISTINCT with ORDER BY can be expensive - consider if both are necessary'; } // Check for functions in SELECT $functions = ['COUNT', 'SUM', 'AVG', 'MAX', 'MIN']; $functionCount = 0; foreach ($functions as $function) { $functionCount += substr_count($upperSql, $function); } if ($functionCount > 5) { $suggestions[] = 'Multiple aggregate functions detected - consider if they can be optimized or cached'; } } /** * Analyze WHERE clauses */ private function analyzeWhereClause(string $sql, array &$suggestions, array &$indexRecommendations): void { $upperSql = strtoupper($sql); if (! str_contains($upperSql, 'WHERE')) { if (str_starts_with($upperSql, 'SELECT')) { $issues[] = 'SELECT without WHERE clause may result in full table scan'; $suggestions[] = 'Add WHERE clause to limit result set'; } return; } // Check for functions in WHERE clause if (preg_match('/WHERE\s+\w+\([^)]+\)\s*(=|<|>|<=|>=)/', $upperSql)) { $issues[] = 'Functions in WHERE clause prevent index usage'; $suggestions[] = 'Avoid functions on columns in WHERE clause to enable index usage'; } // Check for LIKE with leading wildcard if (preg_match('/LIKE\s+\'%[^%]*\'/', $upperSql)) { $issues[] = 'LIKE with leading wildcard prevents index usage'; $suggestions[] = 'Avoid leading wildcards in LIKE patterns for better performance'; } // Extract potential index candidates from WHERE clause if (preg_match_all('/WHERE\s+(\w+)\s*(=|<|>|<=|>=|IN)/', $upperSql, $matches)) { foreach ($matches[1] as $column) { $indexRecommendations[] = "Consider index on column: {$column}"; } } // Check for OR conditions if (str_contains($upperSql, ' OR ')) { $suggestions[] = 'OR conditions can be slower than UNION - consider rewriting if appropriate'; } } /** * Analyze JOIN clauses */ private function analyzeJoins(string $sql, array &$suggestions, array &$issues): void { $upperSql = strtoupper($sql); $joinCount = substr_count($upperSql, 'JOIN'); if ($joinCount === 0) { return; } if ($joinCount > 5) { $issues[] = "High number of JOINs ({$joinCount}) may impact performance"; $suggestions[] = 'Consider denormalization or caching for queries with many JOINs'; } // Check for Cartesian products (JOIN without ON) $onCount = substr_count($upperSql, ' ON '); if ($joinCount > $onCount) { $issues[] = 'Potential Cartesian product detected - missing JOIN conditions'; $suggestions[] = 'Ensure all JOINs have proper ON conditions'; } // Check for table order in JOINs if (str_contains($upperSql, 'LEFT JOIN') || str_contains($upperSql, 'RIGHT JOIN')) { $suggestions[] = 'Consider JOIN order - start with the most selective table'; } } /** * Analyze aggregation functions */ private function analyzeAggregations(string $sql, array &$suggestions): void { $upperSql = strtoupper($sql); if (str_contains($upperSql, 'GROUP BY')) { if (! str_contains($upperSql, 'ORDER BY')) { $suggestions[] = 'GROUP BY without ORDER BY - consider if ordering is needed'; } // Check for GROUP BY with many columns $groupByMatches = []; if (preg_match('/GROUP BY\s+(.+?)(?:\s+ORDER|\s+HAVING|\s*$)/i', $sql, $groupByMatches)) { $columns = explode(',', $groupByMatches[1]); if (count($columns) > 3) { $suggestions[] = 'GROUP BY with many columns can be expensive - verify all are necessary'; } } } if (str_contains($upperSql, 'HAVING')) { if (str_contains($upperSql, 'WHERE')) { $suggestions[] = 'Move non-aggregate conditions from HAVING to WHERE for better performance'; } } } /** * Analyze subqueries */ private function analyzeSubqueries(string $sql, array &$suggestions, array &$issues): void { $selectCount = substr_count(strtoupper($sql), 'SELECT'); if ($selectCount <= 1) { return; } $subqueryCount = $selectCount - 1; if ($subqueryCount > 2) { $issues[] = "Multiple subqueries ({$subqueryCount}) detected"; $suggestions[] = 'Consider rewriting subqueries as JOINs for better performance'; } // Check for correlated subqueries (simplified detection) if (str_contains(strtoupper($sql), 'WHERE EXISTS') || str_contains(strtoupper($sql), 'WHERE NOT EXISTS')) { $suggestions[] = 'Correlated subqueries can be expensive - consider JOIN alternatives'; } } /** * Analyze performance metrics */ private function analyzePerformanceMetrics(QueryProfile $profile, array &$suggestions, array &$issues): void { $executionTimeMs = $profile->executionTime->toMilliseconds(); $memoryUsageMB = $profile->memoryUsage / (1024 * 1024); if ($executionTimeMs > 5000) { $issues[] = 'Very slow execution time (>5 seconds)'; $suggestions[] = 'Consider breaking this query into smaller parts or adding appropriate indexes'; } elseif ($executionTimeMs > 1000) { $issues[] = 'Slow execution time (>1 second)'; $suggestions[] = 'Review query optimization opportunities'; } if ($memoryUsageMB > 50) { $issues[] = 'High memory usage (>50MB)'; $suggestions[] = 'Consider limiting result set size or using pagination'; } elseif ($memoryUsageMB > 10) { $suggestions[] = 'Moderate memory usage detected - monitor if consistent'; } if ($profile->getComplexityScore() > 15) { $issues[] = 'High query complexity score'; $suggestions[] = 'Consider simplifying the query or breaking it into multiple queries'; } } /** * Get execution plan (simplified - database specific) */ private function getExecutionPlan(string $sql): ?string { try { // Try MySQL EXPLAIN $explainSql = "EXPLAIN " . $sql; $result = $this->connection->query($explainSql); $plan = ''; while ($row = $result->fetch()) { $plan .= print_r($row, true) . "\n"; } return $plan ?: null; } catch (\Throwable) { // If EXPLAIN fails, try other database-specific approaches try { // Try PostgreSQL EXPLAIN $explainSql = "EXPLAIN (FORMAT JSON) " . $sql; $result = $this->connection->queryScalar($explainSql); return is_string($result) ? $result : null; } catch (\Throwable) { // Return null if we can't get execution plan return null; } } } /** * Calculate optimization score (0-100, higher is better) */ private function calculateOptimizationScore(QueryProfile $profile, array $issues, array $suggestions): int { $baseScore = 100; // Penalize execution time $executionTimeMs = $profile->executionTime->toMilliseconds(); if ($executionTimeMs > 5000) { $baseScore -= 40; } elseif ($executionTimeMs > 1000) { $baseScore -= 25; } elseif ($executionTimeMs > 500) { $baseScore -= 15; } elseif ($executionTimeMs > 100) { $baseScore -= 10; } // Penalize memory usage $memoryUsageMB = $profile->memoryUsage / (1024 * 1024); if ($memoryUsageMB > 50) { $baseScore -= 20; } elseif ($memoryUsageMB > 10) { $baseScore -= 10; } elseif ($memoryUsageMB > 5) { $baseScore -= 5; } // Penalize complexity $complexityScore = $profile->getComplexityScore(); if ($complexityScore > 15) { $baseScore -= 15; } elseif ($complexityScore > 10) { $baseScore -= 10; } elseif ($complexityScore > 5) { $baseScore -= 5; } // Penalize issues and suggestions $baseScore -= count($issues) * 5; $baseScore -= count($suggestions) * 2; return max(0, min(100, $baseScore)); } /** * Batch analyze multiple profiles */ public function batchAnalyze(array $profiles): array { $analyses = []; foreach ($profiles as $profile) { $analyses[] = $this->analyzeQuery($profile); } return $analyses; } /** * Get optimization summary for multiple analyses */ public function getOptimizationSummary(array $analyses): array { if (empty($analyses)) { return []; } $totalScore = 0; $totalIssues = 0; $totalSuggestions = 0; $commonIssues = []; $commonSuggestions = []; foreach ($analyses as $analysis) { $totalScore += $analysis->optimizationScore; $totalIssues += count($analysis->issues); $totalSuggestions += count($analysis->suggestions); foreach ($analysis->issues as $issue) { $commonIssues[$issue] = ($commonIssues[$issue] ?? 0) + 1; } foreach ($analysis->suggestions as $suggestion) { $commonSuggestions[$suggestion] = ($commonSuggestions[$suggestion] ?? 0) + 1; } } // Sort by frequency arsort($commonIssues); arsort($commonSuggestions); return [ 'total_queries_analyzed' => count($analyses), 'average_optimization_score' => round($totalScore / count($analyses)), 'total_issues' => $totalIssues, 'total_suggestions' => $totalSuggestions, 'most_common_issues' => array_slice($commonIssues, 0, 5, true), 'most_common_suggestions' => array_slice($commonSuggestions, 0, 5, true), 'overall_assessment' => $this->getOverallAssessment($totalScore / count($analyses)), ]; } /** * Get overall assessment based on average score */ private function getOverallAssessment(float $averageScore): string { return match (true) { $averageScore >= 90 => 'excellent', $averageScore >= 75 => 'good', $averageScore >= 60 => 'fair', $averageScore >= 40 => 'poor', default => 'critical' }; } }