Enable Discovery debug logging for production troubleshooting

- Add DISCOVERY_LOG_LEVEL=debug
- Add DISCOVERY_SHOW_PROGRESS=true
- Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
2025-08-11 20:13:26 +02:00
parent 59fd3dd3b1
commit 55a330b223
3683 changed files with 2956207 additions and 16948 deletions

View File

@@ -0,0 +1,29 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\Contracts;
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
/**
* Interface for tokenizer implementations
*/
interface TokenizerInterface
{
/**
* Tokenize code into a collection of tokens
*/
public function tokenize(string $code): TokenCollection;
/**
* Tokenize for discovery purposes (lightweight, metadata focused)
*/
public function tokenizeForDiscovery(string $code): TokenCollection;
/**
* Stream tokenization for large files
* @return \Generator Token generator for memory efficiency
*/
public function tokenizeStream(string $filePath): \Generator;
}

View File

@@ -0,0 +1,284 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\Discovery;
use App\Framework\Tokenizer\PhpTokenizer;
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
/**
* Specialized tokenizer for discovery operations
* Optimized for extracting structural information from PHP files
*/
final readonly class DiscoveryTokenizer
{
public function __construct(
private PhpTokenizer $tokenizer = new PhpTokenizer()
) {
}
/**
* Extract class information from code
*/
public function extractClasses(string $code): array
{
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
$classes = [];
// Find all class-like declarations
$structuralTokens = $tokens->filterById(T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM);
foreach ($structuralTokens as $token) {
$context = $token->context;
$name = $this->findNextIdentifier($tokens, $token);
if ($name) {
// For class FQN, only use namespace + name (not currentClass)
$fqn = $context->currentNamespace ? $context->currentNamespace . '\\' . $name : $name;
$classes[] = [
'type' => match($token->id) {
T_CLASS => 'class',
T_INTERFACE => 'interface',
T_TRAIT => 'trait',
T_ENUM => 'enum',
default => 'unknown'
},
'name' => $name,
'namespace' => $context->currentNamespace,
'fqn' => $fqn,
'line' => $token->line,
];
}
}
return $classes;
}
/**
* Extract attributes from code
*/
public function extractAttributes(string $code): array
{
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
$attributes = [];
// Find all attribute tokens
$attributeTokens = $tokens->filter(fn ($token) => $token->isAttribute());
foreach ($attributeTokens as $token) {
if ($token->context->isInAttribute) {
$attributes[] = [
'name' => $token->value,
'context' => $token->context->getCurrentScopeType(),
'line' => $token->line,
'target' => $this->determineAttributeTarget($tokens, $token),
];
}
}
return $attributes;
}
/**
* Extract functions and methods from code
*/
public function extractFunctions(string $code): array
{
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
$functions = [];
// Find all function declarations
$functionTokens = $tokens->filterById(T_FUNCTION, T_FN);
foreach ($functionTokens as $token) {
$name = $this->findNextIdentifier($tokens, $token);
if ($name) {
$context = $token->context;
$functions[] = [
'name' => $name,
'type' => $context->isInClass ? 'method' : 'function',
'class' => $context->currentClass,
'namespace' => $context->currentNamespace,
'line' => $token->line,
'visibility' => $this->extractVisibility($tokens, $token),
'isStatic' => $this->hasModifier($tokens, $token, T_STATIC),
'isFinal' => $this->hasModifier($tokens, $token, T_FINAL),
'isAbstract' => $this->hasModifier($tokens, $token, T_ABSTRACT),
];
}
}
return $functions;
}
/**
* Extract use statements from code
*/
public function extractUseStatements(string $code): array
{
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
$uses = [];
// Find all use statements
$useTokens = $tokens->filterById(T_USE);
foreach ($useTokens as $token) {
// Skip trait uses inside classes
if ($token->context->isInClass) {
continue;
}
$statement = $this->extractUseStatement($tokens, $token);
if ($statement) {
$uses[] = $statement;
}
}
return $uses;
}
/**
* Find next identifier after a token
*/
private function findNextIdentifier(TokenCollection $tokens, $startToken): ?string
{
$found = false;
foreach ($tokens as $token) {
if ($found && $token->id === T_STRING) {
return $token->value;
}
if ($token === $startToken) {
$found = true;
}
}
return null;
}
/**
* Determine attribute target
*/
private function determineAttributeTarget(TokenCollection $tokens, $attributeToken): string
{
// Look ahead to see what the attribute is targeting
$found = false;
foreach ($tokens as $token) {
if ($found) {
return match($token->id) {
T_CLASS => 'class',
T_FUNCTION, T_FN => 'method',
T_VARIABLE => 'property',
T_CONST => 'constant',
default => 'unknown'
};
}
if ($token === $attributeToken) {
$found = true;
}
}
return 'unknown';
}
/**
* Extract visibility modifier
*/
private function extractVisibility(TokenCollection $tokens, $functionToken): string
{
// Look backward for visibility modifiers
$tokensArray = $tokens->toArray();
$index = array_search($functionToken, $tokensArray, true);
for ($i = $index - 1; $i >= max(0, $index - 5); $i--) {
$token = $tokensArray[$i];
if ($token->is([T_PUBLIC, T_PROTECTED, T_PRIVATE])) {
return match($token->id) {
T_PUBLIC => 'public',
T_PROTECTED => 'protected',
T_PRIVATE => 'private',
default => 'public'
};
}
}
return 'public';
}
/**
* Check if a modifier exists before token
*/
private function hasModifier(TokenCollection $tokens, $targetToken, int $modifierId): bool
{
$tokensArray = $tokens->toArray();
$index = array_search($targetToken, $tokensArray, true);
for ($i = $index - 1; $i >= max(0, $index - 10); $i--) {
if ($tokensArray[$i]->is($modifierId)) {
return true;
}
// Stop at structural boundaries
if ($tokensArray[$i]->is([T_CLASS, T_FUNCTION, T_NAMESPACE])) {
break;
}
}
return false;
}
/**
* Extract use statement details
*/
private function extractUseStatement(TokenCollection $tokens, $useToken): ?array
{
$parts = [];
$alias = null;
$found = false;
$inAlias = false;
foreach ($tokens as $token) {
if ($found) {
if ($token->value === ';') {
break;
}
// Stop at structural keywords that indicate end of use statement
if ($token->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM, T_FUNCTION, T_NAMESPACE])) {
break;
}
// For T_USE, only stop if it's a different use statement (different line)
if ($token->is(T_USE) && $token->line > $useToken->line) {
break;
}
if ($token->is(T_AS)) {
$inAlias = true;
continue;
}
if ($inAlias && $token->is(T_STRING)) {
$alias = $token->value;
continue;
}
if (! $inAlias && ($token->is(T_STRING) || $token->is(T_NS_SEPARATOR) || $token->is(T_NAME_QUALIFIED))) {
$parts[] = $token->value;
}
}
if ($token === $useToken) {
$found = true;
}
}
if (empty($parts)) {
return null;
}
$className = implode('', $parts);
return [
'class' => $className,
'alias' => $alias,
'line' => $useToken->line,
];
}
}

View File

@@ -0,0 +1,310 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer;
use App\Framework\Tokenizer\Contracts\TokenizerInterface;
use App\Framework\Tokenizer\ValueObjects\DiscoveryState;
use App\Framework\Tokenizer\ValueObjects\Token;
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
use App\Framework\Tokenizer\ValueObjects\TokenContext;
use PhpToken;
/**
* Modern PHP tokenizer using PhpToken API
* Can be used for syntax highlighting, code analysis, and discovery
*/
final readonly class PhpTokenizer implements TokenizerInterface
{
public function __construct(
private TokenClassifier $classifier = new TokenClassifier(),
private TokenContextAnalyzer $contextAnalyzer = new TokenContextAnalyzer()
) {
}
/**
* Tokenize PHP code into a collection of tokens
*/
public function tokenize(string $code): TokenCollection
{
// Ensure code has PHP tags for proper tokenization
$processedCode = $this->ensurePhpTags($code);
$needsTagRemoval = $processedCode !== $code;
// Use PhpToken for robust parsing
$phpTokens = PhpToken::tokenize($processedCode);
// Build token collection with context
$tokens = [];
$context = new TokenContext();
foreach ($phpTokens as $index => $phpToken) {
// Skip artificial PHP tag if we added it
if ($needsTagRemoval && $index === 0 && $phpToken->is(T_OPEN_TAG)) {
continue;
}
// Update context
$context = $this->contextAnalyzer->analyzeContext($phpToken, $phpTokens, $index, $context);
// Create token with full context
$token = $this->createToken($phpToken, $context, $phpTokens, $index);
$tokens[] = $token;
}
return new TokenCollection($tokens);
}
/**
* Tokenize for discovery purposes (lightweight, metadata focused)
* Uses smart line-by-line streaming to minimize processing
*/
public function tokenizeForDiscovery(string $code): TokenCollection
{
return $this->streamTokenizeForDiscovery($code);
}
/**
* Smart streaming tokenization that stops early when possible
* Processes line-by-line and identifies content type early
*/
private function streamTokenizeForDiscovery(string $code): TokenCollection
{
$lines = explode("\n", $code);
$discoveryState = new DiscoveryState();
$processedCode = '';
$lineNumber = 0;
foreach ($lines as $line) {
$lineNumber++;
$processedCode .= $line . "\n";
// Quick line-based detection for early stopping
$lineContent = trim($line);
// Update discovery state based on line content
$this->updateDiscoveryState($discoveryState, $lineContent, $lineNumber);
// Check if we can stop early (found all relevant metadata)
if ($this->canStopDiscovery($discoveryState, $lineNumber)) {
break;
}
// Safety limit: don't process more than 100 lines for discovery
if ($lineNumber >= 100) {
break;
}
}
// Now tokenize only the relevant portion
return $this->tokenizeRelevantPortion($processedCode, $discoveryState);
}
/**
* Update discovery state based on line content
*/
private function updateDiscoveryState(DiscoveryState $state, string $line, int $lineNumber): void
{
// Namespace detection
if (! $state->namespaceFound && str_starts_with($line, 'namespace ')) {
$state->namespaceFound = true;
$state->namespaceLineFound = $lineNumber;
}
// Use statement detection
if (str_starts_with($line, 'use ') && ! str_contains($line, ' as ') && str_contains($line, '\\')) {
$state->useStatementsCount++;
}
// Class/Interface/Trait/Enum detection
if (preg_match('/^(final\s+)?(abstract\s+)?(readonly\s+)?(class|interface|trait|enum)\s+(\w+)/', $line)) {
$state->classFound = true;
$state->classLineFound = $lineNumber;
}
// Function detection (public functions are usually the first ones we care about)
if (preg_match('/^\s*(public|protected|private)?\s*(static\s+)?function\s+(\w+)/', $line)) {
$state->functionsCount++;
}
// Attribute detection
if (str_starts_with($line, '#[')) {
$state->attributesCount++;
}
// Doc comment detection
if (str_starts_with($line, '/**')) {
$state->docCommentsCount++;
}
// Class body start
if ($state->classFound && str_contains($line, '{')) {
$state->classBodyStarted = true;
}
}
/**
* Check if we can stop discovery early
*/
private function canStopDiscovery(DiscoveryState $state, int $currentLine): bool
{
// If we found a class and we're well into the class body, we can stop
if ($state->classFound && $state->classBodyStarted && $currentLine > ($state->classLineFound + 20)) {
return true;
}
// If we haven't found any structural elements after 50 lines, likely not much to discover
if ($currentLine > 50 && ! $state->classFound && ! $state->namespaceFound) {
return true;
}
return false;
}
/**
* Tokenize the relevant portion we've identified
*/
private function tokenizeRelevantPortion(string $code, DiscoveryState $state): TokenCollection
{
// Use the optimized tokenization on the reduced code
$tokens = $this->tokenize($code);
// Filter to only relevant tokens for discovery
$filteredTokens = [];
$includeNextString = false;
foreach ($tokens as $token) {
if ($token->isStructural() || $token->isAttribute() || $token->isDocComment()) {
$filteredTokens[] = $token;
// Mark to include the next identifier token (class/function/namespace names)
if ($token->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM, T_FUNCTION, T_USE])) {
$includeNextString = true;
}
} elseif ($includeNextString && ($token->is(T_STRING) || $token->is(T_NAME_QUALIFIED))) {
// Include both T_STRING and T_NAME_QUALIFIED (for namespaced identifiers)
$filteredTokens[] = $token;
$includeNextString = false;
} elseif ($token->is(T_NS_SEPARATOR)) {
// Always include namespace separators for proper namespace parsing
$filteredTokens[] = $token;
}
}
return new TokenCollection($filteredTokens);
}
/**
* Stream tokenization for large files
*/
public function tokenizeStream(string $filePath): \Generator
{
$handle = fopen($filePath, 'r');
if (! $handle) {
throw new \RuntimeException("Cannot open file: $filePath");
}
try {
$buffer = '';
$inPhpBlock = false;
while (! feof($handle)) {
$chunk = fread($handle, 8192); // 8KB chunks
$buffer .= $chunk;
// Find complete PHP blocks in buffer
if ($completeBlock = $this->extractCompleteBlock($buffer, $inPhpBlock)) {
$tokens = $this->tokenize($completeBlock);
yield from $tokens;
// Keep remainder in buffer
$buffer = $this->getBlockRemainder($buffer, $completeBlock);
}
}
// Process remaining buffer
if ($buffer) {
$tokens = $this->tokenize($buffer);
yield from $tokens;
}
} finally {
fclose($handle);
}
}
/**
* Create a token with full context information
*/
private function createToken(
PhpToken $phpToken,
TokenContext $context,
array $allTokens,
int $index
): Token {
// Determine token type with contextual classification
$type = $this->classifier->classify($phpToken, $allTokens, $index, $context);
return new Token(
type: $type,
value: $phpToken->text,
line: $phpToken->line,
position: $phpToken->pos ?? 0,
id: $phpToken->id,
context: $context->clone()
);
}
/**
* Ensure code has PHP tags for tokenization
*/
private function ensurePhpTags(string $code): string
{
$trimmed = trim($code);
if (! str_starts_with($trimmed, '<?php') && ! str_starts_with($trimmed, '<?=')) {
return "<?php\n" . $code;
}
return $code;
}
/**
* Extract a complete PHP block from buffer
*/
private function extractCompleteBlock(string &$buffer, bool &$inPhpBlock): ?string
{
// Simple implementation - can be enhanced for better block detection
if (str_contains($buffer, '<?php')) {
$inPhpBlock = true;
}
if ($inPhpBlock && str_contains($buffer, '?>')) {
$endPos = strpos($buffer, '?>') + 2;
$block = substr($buffer, 0, $endPos);
$inPhpBlock = false;
return $block;
}
// If we have enough buffer, process it
if (strlen($buffer) > 16384) { // 16KB threshold
$block = $buffer;
$buffer = '';
return $block;
}
return null;
}
/**
* Get remainder after extracting block
*/
private function getBlockRemainder(string $buffer, string $extractedBlock): string
{
$pos = strlen($extractedBlock);
return substr($buffer, $pos);
}
}

View File

@@ -0,0 +1,252 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer;
use App\Framework\Tokenizer\ValueObjects\TokenContext;
use App\Framework\Tokenizer\ValueObjects\TokenType;
use PhpToken;
/**
* Classifies PHP tokens into semantic token types
*/
final readonly class TokenClassifier
{
/**
* Classify a token based on its context
*/
public function classify(
PhpToken $token,
array $allTokens,
int $index,
TokenContext $context
): TokenType {
// Handle basic token types first
$basicType = $this->getBasicTokenType($token);
// Apply contextual classification for T_STRING tokens
if ($token->is(T_STRING)) {
return $this->classifyString($token, $allTokens, $index, $context);
}
return $basicType;
}
/**
* Get basic token type from PHP token ID
*/
private function getBasicTokenType(PhpToken $token): TokenType
{
return match($token->id) {
// Keywords
T_ABSTRACT, T_AS, T_BREAK, T_CALLABLE, T_CASE, T_CATCH, T_CLASS,
T_CLONE, T_CONST, T_CONTINUE, T_DECLARE, T_DEFAULT, T_DO, T_ECHO,
T_ELSE, T_ELSEIF, T_EMPTY, T_ENDDECLARE, T_ENDFOR, T_ENDFOREACH,
T_ENDIF, T_ENDSWITCH, T_ENDWHILE, T_ENUM, T_EVAL, T_EXIT, T_EXTENDS,
T_FINAL, T_FINALLY, T_FN, T_FOR, T_FOREACH, T_FUNCTION, T_GLOBAL,
T_GOTO, T_IF, T_IMPLEMENTS, T_INCLUDE, T_INCLUDE_ONCE, T_INSTANCEOF,
T_INSTEADOF, T_INTERFACE, T_ISSET, T_LIST, T_MATCH, T_NAMESPACE,
T_NEW, T_PRINT, T_PRIVATE, T_PROTECTED, T_PUBLIC, T_READONLY,
T_REQUIRE, T_REQUIRE_ONCE, T_RETURN, T_STATIC, T_SWITCH, T_THROW,
T_TRAIT, T_TRY, T_UNSET, T_USE, T_VAR, T_WHILE, T_YIELD,
T_YIELD_FROM => TokenType::KEYWORD,
// Variables
T_VARIABLE => TokenType::VARIABLE,
// String literals
T_CONSTANT_ENCAPSED_STRING, T_ENCAPSED_AND_WHITESPACE => TokenType::STRING_LITERAL,
// Number literals
T_LNUMBER, T_DNUMBER => TokenType::NUMBER_LITERAL,
// Comments
T_COMMENT => TokenType::COMMENT,
T_DOC_COMMENT => TokenType::DOC_COMMENT,
// Operators
T_AND_EQUAL, T_BOOLEAN_AND, T_BOOLEAN_OR, T_COALESCE, T_COALESCE_EQUAL,
T_CONCAT_EQUAL, T_DEC, T_DIV_EQUAL, T_DOUBLE_ARROW, T_DOUBLE_COLON,
T_ELLIPSIS, T_INC, T_IS_EQUAL, T_IS_GREATER_OR_EQUAL, T_IS_IDENTICAL,
T_IS_NOT_EQUAL, T_IS_NOT_IDENTICAL, T_IS_SMALLER_OR_EQUAL, T_LOGICAL_AND,
T_LOGICAL_OR, T_LOGICAL_XOR, T_MINUS_EQUAL, T_MOD_EQUAL, T_MUL_EQUAL,
T_NULLSAFE_OBJECT_OPERATOR, T_OBJECT_OPERATOR, T_OR_EQUAL, T_PLUS_EQUAL,
T_POW, T_POW_EQUAL, T_SL, T_SL_EQUAL, T_SPACESHIP, T_SR, T_SR_EQUAL,
T_XOR_EQUAL => TokenType::OPERATOR,
// Whitespace
T_WHITESPACE => TokenType::WHITESPACE,
// PHP tags
T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, T_CLOSE_TAG => TokenType::PHP_TAG,
// Attributes
T_ATTRIBUTE => TokenType::ATTRIBUTE,
// HTML/inline content
T_INLINE_HTML => TokenType::HTML,
// Default for known tokens
default => $this->classifySingleCharacter($token)
};
}
/**
* Classify single character tokens
*/
private function classifySingleCharacter(PhpToken $token): TokenType
{
return match($token->text) {
// Operators
'+', '-', '*', '/', '%', '=', '<', '>', '!', '&', '|', '^', '~', '?', ':' => TokenType::OPERATOR,
// Brackets
'[', ']' => TokenType::BRACKET,
// Parentheses
'(', ')' => TokenType::PARENTHESIS,
// Braces
'{', '}' => TokenType::BRACE,
// Punctuation
',', '.' => TokenType::PUNCTUATION,
// Semicolon
';' => TokenType::SEMICOLON,
// Default
default => TokenType::DEFAULT
};
}
/**
* Classify T_STRING tokens based on context
*/
private function classifyString(
PhpToken $token,
array $allTokens,
int $index,
TokenContext $context
): TokenType {
// Check previous token for context clues
$prevToken = $index > 0 ? $allTokens[$index - 1] : null;
$nextToken = $index < count($allTokens) - 1 ? $allTokens[$index + 1] : null;
// After class/interface/trait/enum keywords
if ($prevToken && $prevToken->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM])) {
return match($prevToken->id) {
T_CLASS => TokenType::CLASS_NAME,
T_INTERFACE => TokenType::INTERFACE_NAME,
T_TRAIT => TokenType::TRAIT_NAME,
T_ENUM => TokenType::ENUM_NAME,
default => TokenType::CLASS_NAME
};
}
// After new/extends/implements
if ($prevToken && $prevToken->is([T_NEW, T_EXTENDS, T_IMPLEMENTS])) {
return TokenType::CLASS_NAME;
}
// After function keyword
if ($prevToken && $prevToken->is(T_FUNCTION)) {
return $context->isInClass ? TokenType::METHOD_NAME : TokenType::FUNCTION_NAME;
}
// After :: (static method/constant)
if ($prevToken && $prevToken->is(T_DOUBLE_COLON)) {
return $nextToken && $nextToken->text === '('
? TokenType::METHOD_NAME
: TokenType::CONSTANT_NAME;
}
// After -> (object method/property)
if ($prevToken && $prevToken->is([T_OBJECT_OPERATOR, T_NULLSAFE_OBJECT_OPERATOR])) {
return $nextToken && $nextToken->text === '('
? TokenType::METHOD_NAME
: TokenType::PROPERTY_NAME;
}
// In attribute context
if ($context->isInAttribute) {
return TokenType::ATTRIBUTE_NAME;
}
// Function/method call (followed by parenthesis)
if ($nextToken && $nextToken->text === '(') {
// Check if it's after 'new' (constructor)
if ($prevToken && $prevToken->is(T_NEW)) {
return TokenType::CLASS_NAME;
}
return $context->isInClass ? TokenType::METHOD_NAME : TokenType::FUNCTION_NAME;
}
// Constants (all uppercase)
if (ctype_upper($token->text) && strlen($token->text) > 1) {
return TokenType::CONSTANT_NAME;
}
// Type hints (in function parameters or return types)
if ($this->isTypeHintContext($allTokens, $index)) {
return TokenType::TYPE_HINT;
}
// Class names (PascalCase)
if (preg_match('/^[A-Z][a-zA-Z0-9]*$/', $token->text)) {
return TokenType::CLASS_NAME;
}
// Default to keyword if it's a known keyword
if ($this->isKeyword($token->text)) {
return TokenType::KEYWORD;
}
return TokenType::DEFAULT;
}
/**
* Check if we're in a type hint context
*/
private function isTypeHintContext(array $tokens, int $index): bool
{
// Look for : (return type) or function parameter context
for ($i = $index - 1; $i >= max(0, $index - 5); $i--) {
if ($tokens[$i]->text === ':') {
return true;
}
if ($tokens[$i]->is([T_FUNCTION, T_FN])) {
// Check if we're in parameter list
$parenCount = 0;
for ($j = $i; $j < $index; $j++) {
if ($tokens[$j]->text === '(') {
$parenCount++;
}
if ($tokens[$j]->text === ')') {
$parenCount--;
}
}
return $parenCount > 0;
}
}
return false;
}
/**
* Check if a string is a PHP keyword
*/
private function isKeyword(string $text): bool
{
static $keywords = [
'true', 'false', 'null', 'void', 'never', 'mixed',
'int', 'float', 'string', 'bool', 'array', 'object',
'callable', 'iterable', 'resource', 'self', 'parent', 'static',
];
return in_array(strtolower($text), $keywords, true);
}
}

View File

@@ -0,0 +1,150 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer;
use App\Framework\Tokenizer\ValueObjects\TokenContext;
use PhpToken;
/**
* Analyzes and maintains token context during parsing
*/
final class TokenContextAnalyzer
{
/**
* Analyze context for a token
*/
public function analyzeContext(
PhpToken $token,
array $allTokens,
int $index,
TokenContext $currentContext
): TokenContext {
// Handle scope entering tokens
$context = match($token->id) {
T_CLASS => $this->enterClassScope($allTokens, $index, $currentContext, 'class'),
T_INTERFACE => $this->enterClassScope($allTokens, $index, $currentContext, 'interface'),
T_TRAIT => $this->enterClassScope($allTokens, $index, $currentContext, 'trait'),
T_ENUM => $this->enterClassScope($allTokens, $index, $currentContext, 'enum'),
T_FUNCTION, T_FN => $this->enterFunctionScope($allTokens, $index, $currentContext),
T_NAMESPACE => $this->enterNamespaceScope($allTokens, $index, $currentContext),
T_ATTRIBUTE => $currentContext->enterScope('attribute'),
T_DOC_COMMENT => $currentContext->enterScope('doccomment'),
default => $currentContext
};
// Handle scope exiting
if ($token->text === '}') {
$context = $this->handleClosingBrace($context);
} elseif ($token->text === ']' && $context->isInAttribute) {
$context = $context->exitScope();
} elseif ($context->isInDocComment && ! $token->is(T_DOC_COMMENT)) {
$context = $context->exitScope();
}
return $context;
}
/**
* Enter a class-like scope
*/
private function enterClassScope(
array $tokens,
int $index,
TokenContext $context,
string $type
): TokenContext {
$name = $this->findNextIdentifier($tokens, $index);
return $context->enterScope($type, $name);
}
/**
* Enter a function scope
*/
private function enterFunctionScope(
array $tokens,
int $index,
TokenContext $context
): TokenContext {
$name = $this->findNextIdentifier($tokens, $index);
$type = $context->isInClass ? 'method' : 'function';
return $context->enterScope($type, $name);
}
/**
* Enter a namespace scope
*/
private function enterNamespaceScope(
array $tokens,
int $index,
TokenContext $context
): TokenContext {
$namespace = $this->extractNamespace($tokens, $index);
return $context->enterScope('namespace', $namespace);
}
/**
* Handle closing brace - exit appropriate scope
*/
private function handleClosingBrace(TokenContext $context): TokenContext
{
$currentScope = $context->getCurrentScopeType();
// Only exit scope for structural braces
$shouldExit = match($currentScope) {
'class', 'interface', 'trait', 'enum',
'function', 'method', 'namespace' => true,
default => false
};
return $shouldExit ? $context->exitScope() : $context;
}
/**
* Find the next identifier token
*/
private function findNextIdentifier(array $tokens, int $startIndex): ?string
{
for ($i = $startIndex + 1; $i < count($tokens); $i++) {
if ($tokens[$i]->is(T_STRING)) {
return $tokens[$i]->text;
}
// Stop if we hit structural elements
if ($tokens[$i]->text === '{' || $tokens[$i]->text === ';') {
break;
}
}
return null;
}
/**
* Extract namespace from tokens
*/
private function extractNamespace(array $tokens, int $startIndex): string
{
$parts = [];
$inNamespace = false;
for ($i = $startIndex + 1; $i < count($tokens); $i++) {
$token = $tokens[$i];
if ($token->is(T_STRING) || $token->is(T_NS_SEPARATOR) || $token->is(T_NAME_QUALIFIED)) {
$inNamespace = true;
$parts[] = $token->text;
} elseif ($inNamespace && ($token->text === ';' || $token->text === '{')) {
break;
} elseif ($token->is(T_WHITESPACE)) {
continue;
} elseif ($inNamespace) {
break;
}
}
return implode('', $parts);
}
}

View File

@@ -0,0 +1,56 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
/**
* State tracking for smart discovery tokenization
* Tracks what we've found so far to enable early stopping
*/
final class DiscoveryState
{
public bool $namespaceFound = false;
public int $namespaceLineFound = 0;
public bool $classFound = false;
public int $classLineFound = 0;
public bool $classBodyStarted = false;
public int $useStatementsCount = 0;
public int $functionsCount = 0;
public int $attributesCount = 0;
public int $docCommentsCount = 0;
/**
* Check if we have found significant structural content
*/
public function hasSignificantContent(): bool
{
return $this->namespaceFound ||
$this->classFound ||
$this->useStatementsCount > 0 ||
$this->functionsCount > 0;
}
/**
* Get a summary of what we've discovered
*/
public function getSummary(): array
{
return [
'namespace' => $this->namespaceFound,
'class' => $this->classFound,
'uses' => $this->useStatementsCount,
'functions' => $this->functionsCount,
'attributes' => $this->attributesCount,
'lines_processed' => max($this->namespaceLineFound, $this->classLineFound),
];
}
}

View File

@@ -0,0 +1,122 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
/**
* Represents a single token with full context
*/
final readonly class Token
{
public function __construct(
public TokenType $type,
public string $value,
public int $line,
public int $position,
public int $id,
public TokenContext $context
) {
}
/**
* Check if token is a specific PHP token type
*/
public function is(int|array $tokenId): bool
{
if (is_array($tokenId)) {
return in_array($this->id, $tokenId, true);
}
return $this->id === $tokenId;
}
/**
* Check if token is structural (class, function, namespace, etc.)
*/
public function isStructural(): bool
{
return $this->is([
T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM,
T_FUNCTION, T_NAMESPACE, T_USE,
T_EXTENDS, T_IMPLEMENTS,
T_PUBLIC, T_PROTECTED, T_PRIVATE,
T_STATIC, T_FINAL, T_ABSTRACT, T_READONLY,
]);
}
/**
* Check if token is an attribute
*/
public function isAttribute(): bool
{
return $this->is(T_ATTRIBUTE) ||
($this->context->isInAttribute && $this->is(T_STRING));
}
/**
* Check if token is a doc comment
*/
public function isDocComment(): bool
{
return $this->is(T_DOC_COMMENT);
}
/**
* Check if token is a keyword
*/
public function isKeyword(): bool
{
return $this->type === TokenType::KEYWORD;
}
/**
* Check if token is an identifier (class name, function name, etc.)
*/
public function isIdentifier(): bool
{
return in_array($this->type, [
TokenType::CLASS_NAME,
TokenType::FUNCTION_NAME,
TokenType::METHOD_NAME,
TokenType::PROPERTY_NAME,
TokenType::CONSTANT_NAME,
], true);
}
/**
* Get token length
*/
public function getLength(): int
{
return strlen($this->value);
}
/**
* Get end position
*/
public function getEndPosition(): int
{
return $this->position + $this->getLength();
}
/**
* Check if this token contains a specific position
*/
public function containsPosition(int $position): bool
{
return $position >= $this->position && $position < $this->getEndPosition();
}
/**
* Get a clean version of the value (trimmed, no quotes, etc.)
*/
public function getCleanValue(): string
{
return match($this->type) {
TokenType::STRING_LITERAL => trim($this->value, '"\''),
TokenType::DOC_COMMENT => trim($this->value, '/*'),
default => trim($this->value)
};
}
}

View File

@@ -0,0 +1,245 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
use ArrayIterator;
use Countable;
use IteratorAggregate;
use Traversable;
/**
* Collection of tokens with utility methods
*/
final readonly class TokenCollection implements IteratorAggregate, Countable
{
/**
* @param array<Token> $tokens
*/
public function __construct(
private array $tokens = []
) {
}
/**
* Get iterator for the collection
*/
public function getIterator(): Traversable
{
return new ArrayIterator($this->tokens);
}
/**
* Count tokens in collection
*/
public function count(): int
{
return count($this->tokens);
}
/**
* Get all tokens as array
* @return array<Token>
*/
public function toArray(): array
{
return $this->tokens;
}
/**
* Get token at index
*/
public function get(int $index): ?Token
{
return $this->tokens[$index] ?? null;
}
/**
* Filter tokens by predicate
*/
public function filter(callable $predicate): self
{
return new self(array_values(array_filter($this->tokens, $predicate)));
}
/**
* Filter by token type
*/
public function filterByType(TokenType ...$types): self
{
return $this->filter(fn (Token $token) => in_array($token->type, $types, true));
}
/**
* Filter by PHP token ID
*/
public function filterById(int ...$ids): self
{
return $this->filter(fn (Token $token) => in_array($token->id, $ids, true));
}
/**
* Get only structural tokens
*/
public function getStructural(): self
{
return $this->filter(fn (Token $token) => $token->isStructural());
}
/**
* Get only identifiers
*/
public function getIdentifiers(): self
{
return $this->filter(fn (Token $token) => $token->isIdentifier());
}
/**
* Get tokens in line range
*/
public function getInLineRange(int $startLine, int $endLine): self
{
return $this->filter(
fn (Token $token) =>
$token->line >= $startLine && $token->line <= $endLine
);
}
/**
* Get tokens at specific line
*/
public function getAtLine(int $line): self
{
return $this->filter(fn (Token $token) => $token->line === $line);
}
/**
* Find first token matching predicate
*/
public function findFirst(callable $predicate): ?Token
{
foreach ($this->tokens as $token) {
if ($predicate($token)) {
return $token;
}
}
return null;
}
/**
* Find first token of type
*/
public function findFirstOfType(TokenType $type): ?Token
{
return $this->findFirst(fn (Token $token) => $token->type === $type);
}
/**
* Map tokens to another form
*/
public function map(callable $mapper): array
{
return array_map($mapper, $this->tokens);
}
/**
* Extract all values
*/
public function getValues(): array
{
return $this->map(fn (Token $token) => $token->value);
}
/**
* Extract all clean values
*/
public function getCleanValues(): array
{
return $this->map(fn (Token $token) => $token->getCleanValue());
}
/**
* Get tokens grouped by type
* @return array<string, array<Token>>
*/
public function groupByType(): array
{
$groups = [];
foreach ($this->tokens as $token) {
$type = $token->type->value;
if (! isset($groups[$type])) {
$groups[$type] = [];
}
$groups[$type][] = $token;
}
return $groups;
}
/**
* Get tokens grouped by line
* @return array<int, array<Token>>
*/
public function groupByLine(): array
{
$groups = [];
foreach ($this->tokens as $token) {
if (! isset($groups[$token->line])) {
$groups[$token->line] = [];
}
$groups[$token->line][] = $token;
}
return $groups;
}
/**
* Check if collection is empty
*/
public function isEmpty(): bool
{
return empty($this->tokens);
}
/**
* Get first token
*/
public function first(): ?Token
{
return $this->tokens[0] ?? null;
}
/**
* Get last token
*/
public function last(): ?Token
{
return empty($this->tokens) ? null : $this->tokens[count($this->tokens) - 1];
}
/**
* Slice collection
*/
public function slice(int $offset, ?int $length = null): self
{
return new self(array_slice($this->tokens, $offset, $length));
}
/**
* Merge with another collection
*/
public function merge(self $other): self
{
return new self(array_merge($this->tokens, $other->tokens));
}
/**
* Convert to string (concatenate all values)
*/
public function toString(): string
{
return implode('', $this->getValues());
}
}

View File

@@ -0,0 +1,208 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
/**
* Context information for a token
*/
final class TokenContext
{
public function __construct(
public readonly bool $isInClass = false,
public readonly bool $isInFunction = false,
public readonly bool $isInNamespace = false,
public readonly bool $isInAttribute = false,
public readonly bool $isInDocComment = false,
public readonly ?string $currentClass = null,
public readonly ?string $currentFunction = null,
public readonly ?string $currentNamespace = null,
public readonly array $scopeStack = [],
public readonly int $nestingLevel = 0
) {
}
/**
* Create a new context with updated values
*/
public function with(array $updates): self
{
return new self(
isInClass: $updates['isInClass'] ?? $this->isInClass,
isInFunction: $updates['isInFunction'] ?? $this->isInFunction,
isInNamespace: $updates['isInNamespace'] ?? $this->isInNamespace,
isInAttribute: $updates['isInAttribute'] ?? $this->isInAttribute,
isInDocComment: $updates['isInDocComment'] ?? $this->isInDocComment,
currentClass: $updates['currentClass'] ?? $this->currentClass,
currentFunction: $updates['currentFunction'] ?? $this->currentFunction,
currentNamespace: $updates['currentNamespace'] ?? $this->currentNamespace,
scopeStack: $updates['scopeStack'] ?? $this->scopeStack,
nestingLevel: $updates['nestingLevel'] ?? $this->nestingLevel
);
}
/**
* Enter a new scope
*/
public function enterScope(string $type, ?string $name = null): self
{
$newStack = $this->scopeStack;
$newStack[] = ['type' => $type, 'name' => $name];
$updates = [
'scopeStack' => $newStack,
'nestingLevel' => $this->nestingLevel + 1,
];
// Use match for cleaner, type-safe scope handling
$scopeUpdates = match($type) {
'class', 'interface', 'trait', 'enum' => [
'isInClass' => true,
'currentClass' => $name,
],
'function', 'method' => [
'isInFunction' => true,
'currentFunction' => $name,
],
'namespace' => [
'isInNamespace' => true,
'currentNamespace' => $name,
],
'attribute' => [
'isInAttribute' => true,
],
'doccomment' => [
'isInDocComment' => true,
],
default => []
};
return $this->with([...$updates, ...$scopeUpdates]);
}
/**
* Exit current scope
*/
public function exitScope(): self
{
if (empty($this->scopeStack)) {
return $this;
}
$newStack = $this->scopeStack;
$exitedScope = array_pop($newStack);
$updates = [
'scopeStack' => $newStack,
'nestingLevel' => max(0, $this->nestingLevel - 1),
];
// Update context based on remaining stack
if (empty($newStack)) {
$updates = [
...$updates,
'isInClass' => false,
'isInFunction' => false,
'isInAttribute' => false,
'isInDocComment' => false,
'currentClass' => null,
'currentFunction' => null,
];
} else {
// Check what scopes we're still in
$stillInClass = false;
$stillInFunction = false;
$stillInAttribute = false;
$stillInDocComment = false;
$currentClass = null;
$currentFunction = null;
foreach ($newStack as $scope) {
$scopeType = $scope['type'];
match($scopeType) {
'class', 'interface', 'trait', 'enum' => ($stillInClass = true) && ($currentClass = $scope['name']),
'function', 'method' => ($stillInFunction = true) && ($currentFunction = $scope['name']),
'attribute' => $stillInAttribute = true,
'doccomment' => $stillInDocComment = true,
default => null
};
}
$updates = [
...$updates,
'isInClass' => $stillInClass,
'isInFunction' => $stillInFunction,
'isInAttribute' => $stillInAttribute,
'isInDocComment' => $stillInDocComment,
'currentClass' => $currentClass,
'currentFunction' => $currentFunction,
];
}
return $this->with($updates);
}
/**
* Get the current scope type
*/
public function getCurrentScopeType(): ?string
{
if (empty($this->scopeStack)) {
return null;
}
$currentScope = $this->scopeStack[array_key_last($this->scopeStack)];
return $currentScope['type'] ?? null;
}
/**
* Check if we're in a specific scope type
*/
public function isInScopeType(string $type): bool
{
return match($type) {
'class' => $this->isInClass,
'function' => $this->isInFunction,
'namespace' => $this->isInNamespace,
'attribute' => $this->isInAttribute,
'doccomment' => $this->isInDocComment,
default => false
};
}
/**
* Get fully qualified name for current context
*/
public function getFullyQualifiedName(?string $name = null): string
{
$parts = array_filter([
$this->currentNamespace,
$this->currentClass,
$name,
]);
return implode('\\', $parts);
}
/**
* Clone the context
*/
public function clone(): self
{
return new self(
isInClass: $this->isInClass,
isInFunction: $this->isInFunction,
isInNamespace: $this->isInNamespace,
isInAttribute: $this->isInAttribute,
isInDocComment: $this->isInDocComment,
currentClass: $this->currentClass,
currentFunction: $this->currentFunction,
currentNamespace: $this->currentNamespace,
scopeStack: $this->scopeStack,
nestingLevel: $this->nestingLevel
);
}
}

View File

@@ -0,0 +1,132 @@
<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
/**
* Token type enumeration for semantic classification
*/
enum TokenType: string
{
// Structural
case KEYWORD = 'keyword';
case CLASS_NAME = 'class_name';
case INTERFACE_NAME = 'interface_name';
case TRAIT_NAME = 'trait_name';
case ENUM_NAME = 'enum_name';
case NAMESPACE_NAME = 'namespace_name';
// Identifiers
case FUNCTION_NAME = 'function_name';
case METHOD_NAME = 'method_name';
case PROPERTY_NAME = 'property_name';
case CONSTANT_NAME = 'constant_name';
case VARIABLE = 'variable';
case PARAMETER = 'parameter';
// Literals
case STRING_LITERAL = 'string_literal';
case NUMBER_LITERAL = 'number_literal';
case BOOLEAN_LITERAL = 'boolean_literal';
case NULL_LITERAL = 'null_literal';
// Comments
case DOC_COMMENT = 'doc_comment';
case DOC_TAG = 'doc_tag';
case DOC_TYPE = 'doc_type';
case DOC_VARIABLE = 'doc_variable';
case DOC_TEXT = 'doc_text';
case COMMENT = 'comment';
// Operators and syntax
case OPERATOR = 'operator';
case PUNCTUATION = 'punctuation';
case BRACKET = 'bracket';
case PARENTHESIS = 'parenthesis';
case BRACE = 'brace';
case SEMICOLON = 'semicolon';
// Attributes
case ATTRIBUTE = 'attribute';
case ATTRIBUTE_NAME = 'attribute_name';
case ATTRIBUTE_ARGUMENT = 'attribute_argument';
// Types
case TYPE_HINT = 'type_hint';
case RETURN_TYPE = 'return_type';
case UNION_TYPE = 'union_type';
case INTERSECTION_TYPE = 'intersection_type';
// Special
case WHITESPACE = 'whitespace';
case PHP_TAG = 'php_tag';
case HTML = 'html';
case DEFAULT = 'default';
case ERROR = 'error';
/**
* Get CSS class for syntax highlighting
*/
public function getCssClass(): string
{
return 'token-' . str_replace('_', '-', $this->value);
}
/**
* Get color for terminal output
*/
public function getTerminalColor(): string
{
return match($this) {
self::KEYWORD => "\033[35m", // Magenta
self::CLASS_NAME,
self::INTERFACE_NAME,
self::TRAIT_NAME,
self::ENUM_NAME => "\033[36m", // Cyan
self::FUNCTION_NAME,
self::METHOD_NAME => "\033[33m", // Yellow
self::VARIABLE,
self::PARAMETER => "\033[37m", // White
self::STRING_LITERAL => "\033[32m", // Green
self::NUMBER_LITERAL => "\033[34m", // Blue
self::COMMENT,
self::DOC_COMMENT => "\033[90m", // Gray
self::ATTRIBUTE,
self::ATTRIBUTE_NAME => "\033[95m", // Light Magenta
self::OPERATOR => "\033[31m", // Red
default => "\033[0m" // Reset
};
}
/**
* Check if this is a structural token type
*/
public function isStructural(): bool
{
return in_array($this, [
self::CLASS_NAME,
self::INTERFACE_NAME,
self::TRAIT_NAME,
self::ENUM_NAME,
self::NAMESPACE_NAME,
self::FUNCTION_NAME,
self::METHOD_NAME,
], true);
}
/**
* Check if this is a comment type
*/
public function isComment(): bool
{
return in_array($this, [
self::COMMENT,
self::DOC_COMMENT,
self::DOC_TAG,
self::DOC_TYPE,
self::DOC_VARIABLE,
self::DOC_TEXT,
], true);
}
}