Enable Discovery debug logging for production troubleshooting
- Add DISCOVERY_LOG_LEVEL=debug - Add DISCOVERY_SHOW_PROGRESS=true - Temporary changes for debugging InitializerProcessor fixes on production
This commit is contained in:
29
src/Framework/Tokenizer/Contracts/TokenizerInterface.php
Normal file
29
src/Framework/Tokenizer/Contracts/TokenizerInterface.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\Contracts;
|
||||
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
|
||||
|
||||
/**
|
||||
* Interface for tokenizer implementations
|
||||
*/
|
||||
interface TokenizerInterface
|
||||
{
|
||||
/**
|
||||
* Tokenize code into a collection of tokens
|
||||
*/
|
||||
public function tokenize(string $code): TokenCollection;
|
||||
|
||||
/**
|
||||
* Tokenize for discovery purposes (lightweight, metadata focused)
|
||||
*/
|
||||
public function tokenizeForDiscovery(string $code): TokenCollection;
|
||||
|
||||
/**
|
||||
* Stream tokenization for large files
|
||||
* @return \Generator Token generator for memory efficiency
|
||||
*/
|
||||
public function tokenizeStream(string $filePath): \Generator;
|
||||
}
|
||||
284
src/Framework/Tokenizer/Discovery/DiscoveryTokenizer.php
Normal file
284
src/Framework/Tokenizer/Discovery/DiscoveryTokenizer.php
Normal file
@@ -0,0 +1,284 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\Discovery;
|
||||
|
||||
use App\Framework\Tokenizer\PhpTokenizer;
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
|
||||
|
||||
/**
|
||||
* Specialized tokenizer for discovery operations
|
||||
* Optimized for extracting structural information from PHP files
|
||||
*/
|
||||
final readonly class DiscoveryTokenizer
|
||||
{
|
||||
public function __construct(
|
||||
private PhpTokenizer $tokenizer = new PhpTokenizer()
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract class information from code
|
||||
*/
|
||||
public function extractClasses(string $code): array
|
||||
{
|
||||
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
|
||||
$classes = [];
|
||||
|
||||
// Find all class-like declarations
|
||||
$structuralTokens = $tokens->filterById(T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM);
|
||||
|
||||
foreach ($structuralTokens as $token) {
|
||||
$context = $token->context;
|
||||
$name = $this->findNextIdentifier($tokens, $token);
|
||||
|
||||
if ($name) {
|
||||
// For class FQN, only use namespace + name (not currentClass)
|
||||
$fqn = $context->currentNamespace ? $context->currentNamespace . '\\' . $name : $name;
|
||||
$classes[] = [
|
||||
'type' => match($token->id) {
|
||||
T_CLASS => 'class',
|
||||
T_INTERFACE => 'interface',
|
||||
T_TRAIT => 'trait',
|
||||
T_ENUM => 'enum',
|
||||
default => 'unknown'
|
||||
},
|
||||
'name' => $name,
|
||||
'namespace' => $context->currentNamespace,
|
||||
'fqn' => $fqn,
|
||||
'line' => $token->line,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $classes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract attributes from code
|
||||
*/
|
||||
public function extractAttributes(string $code): array
|
||||
{
|
||||
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
|
||||
$attributes = [];
|
||||
|
||||
// Find all attribute tokens
|
||||
$attributeTokens = $tokens->filter(fn ($token) => $token->isAttribute());
|
||||
|
||||
foreach ($attributeTokens as $token) {
|
||||
if ($token->context->isInAttribute) {
|
||||
$attributes[] = [
|
||||
'name' => $token->value,
|
||||
'context' => $token->context->getCurrentScopeType(),
|
||||
'line' => $token->line,
|
||||
'target' => $this->determineAttributeTarget($tokens, $token),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract functions and methods from code
|
||||
*/
|
||||
public function extractFunctions(string $code): array
|
||||
{
|
||||
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
|
||||
$functions = [];
|
||||
|
||||
// Find all function declarations
|
||||
$functionTokens = $tokens->filterById(T_FUNCTION, T_FN);
|
||||
|
||||
foreach ($functionTokens as $token) {
|
||||
$name = $this->findNextIdentifier($tokens, $token);
|
||||
|
||||
if ($name) {
|
||||
$context = $token->context;
|
||||
$functions[] = [
|
||||
'name' => $name,
|
||||
'type' => $context->isInClass ? 'method' : 'function',
|
||||
'class' => $context->currentClass,
|
||||
'namespace' => $context->currentNamespace,
|
||||
'line' => $token->line,
|
||||
'visibility' => $this->extractVisibility($tokens, $token),
|
||||
'isStatic' => $this->hasModifier($tokens, $token, T_STATIC),
|
||||
'isFinal' => $this->hasModifier($tokens, $token, T_FINAL),
|
||||
'isAbstract' => $this->hasModifier($tokens, $token, T_ABSTRACT),
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $functions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract use statements from code
|
||||
*/
|
||||
public function extractUseStatements(string $code): array
|
||||
{
|
||||
$tokens = $this->tokenizer->tokenizeForDiscovery($code);
|
||||
$uses = [];
|
||||
|
||||
// Find all use statements
|
||||
$useTokens = $tokens->filterById(T_USE);
|
||||
|
||||
foreach ($useTokens as $token) {
|
||||
// Skip trait uses inside classes
|
||||
if ($token->context->isInClass) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$statement = $this->extractUseStatement($tokens, $token);
|
||||
if ($statement) {
|
||||
$uses[] = $statement;
|
||||
}
|
||||
}
|
||||
|
||||
return $uses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find next identifier after a token
|
||||
*/
|
||||
private function findNextIdentifier(TokenCollection $tokens, $startToken): ?string
|
||||
{
|
||||
$found = false;
|
||||
foreach ($tokens as $token) {
|
||||
if ($found && $token->id === T_STRING) {
|
||||
return $token->value;
|
||||
}
|
||||
if ($token === $startToken) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine attribute target
|
||||
*/
|
||||
private function determineAttributeTarget(TokenCollection $tokens, $attributeToken): string
|
||||
{
|
||||
// Look ahead to see what the attribute is targeting
|
||||
$found = false;
|
||||
foreach ($tokens as $token) {
|
||||
if ($found) {
|
||||
return match($token->id) {
|
||||
T_CLASS => 'class',
|
||||
T_FUNCTION, T_FN => 'method',
|
||||
T_VARIABLE => 'property',
|
||||
T_CONST => 'constant',
|
||||
default => 'unknown'
|
||||
};
|
||||
}
|
||||
if ($token === $attributeToken) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
|
||||
return 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract visibility modifier
|
||||
*/
|
||||
private function extractVisibility(TokenCollection $tokens, $functionToken): string
|
||||
{
|
||||
// Look backward for visibility modifiers
|
||||
$tokensArray = $tokens->toArray();
|
||||
$index = array_search($functionToken, $tokensArray, true);
|
||||
|
||||
for ($i = $index - 1; $i >= max(0, $index - 5); $i--) {
|
||||
$token = $tokensArray[$i];
|
||||
if ($token->is([T_PUBLIC, T_PROTECTED, T_PRIVATE])) {
|
||||
return match($token->id) {
|
||||
T_PUBLIC => 'public',
|
||||
T_PROTECTED => 'protected',
|
||||
T_PRIVATE => 'private',
|
||||
default => 'public'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return 'public';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a modifier exists before token
|
||||
*/
|
||||
private function hasModifier(TokenCollection $tokens, $targetToken, int $modifierId): bool
|
||||
{
|
||||
$tokensArray = $tokens->toArray();
|
||||
$index = array_search($targetToken, $tokensArray, true);
|
||||
|
||||
for ($i = $index - 1; $i >= max(0, $index - 10); $i--) {
|
||||
if ($tokensArray[$i]->is($modifierId)) {
|
||||
return true;
|
||||
}
|
||||
// Stop at structural boundaries
|
||||
if ($tokensArray[$i]->is([T_CLASS, T_FUNCTION, T_NAMESPACE])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract use statement details
|
||||
*/
|
||||
private function extractUseStatement(TokenCollection $tokens, $useToken): ?array
|
||||
{
|
||||
$parts = [];
|
||||
$alias = null;
|
||||
$found = false;
|
||||
$inAlias = false;
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if ($found) {
|
||||
if ($token->value === ';') {
|
||||
break;
|
||||
}
|
||||
// Stop at structural keywords that indicate end of use statement
|
||||
if ($token->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM, T_FUNCTION, T_NAMESPACE])) {
|
||||
break;
|
||||
}
|
||||
// For T_USE, only stop if it's a different use statement (different line)
|
||||
if ($token->is(T_USE) && $token->line > $useToken->line) {
|
||||
break;
|
||||
}
|
||||
if ($token->is(T_AS)) {
|
||||
$inAlias = true;
|
||||
|
||||
continue;
|
||||
}
|
||||
if ($inAlias && $token->is(T_STRING)) {
|
||||
$alias = $token->value;
|
||||
|
||||
continue;
|
||||
}
|
||||
if (! $inAlias && ($token->is(T_STRING) || $token->is(T_NS_SEPARATOR) || $token->is(T_NAME_QUALIFIED))) {
|
||||
$parts[] = $token->value;
|
||||
}
|
||||
}
|
||||
if ($token === $useToken) {
|
||||
$found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($parts)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$className = implode('', $parts);
|
||||
|
||||
return [
|
||||
'class' => $className,
|
||||
'alias' => $alias,
|
||||
'line' => $useToken->line,
|
||||
];
|
||||
}
|
||||
}
|
||||
310
src/Framework/Tokenizer/PhpTokenizer.php
Normal file
310
src/Framework/Tokenizer/PhpTokenizer.php
Normal file
@@ -0,0 +1,310 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer;
|
||||
|
||||
use App\Framework\Tokenizer\Contracts\TokenizerInterface;
|
||||
use App\Framework\Tokenizer\ValueObjects\DiscoveryState;
|
||||
use App\Framework\Tokenizer\ValueObjects\Token;
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenCollection;
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenContext;
|
||||
use PhpToken;
|
||||
|
||||
/**
|
||||
* Modern PHP tokenizer using PhpToken API
|
||||
* Can be used for syntax highlighting, code analysis, and discovery
|
||||
*/
|
||||
final readonly class PhpTokenizer implements TokenizerInterface
|
||||
{
|
||||
public function __construct(
|
||||
private TokenClassifier $classifier = new TokenClassifier(),
|
||||
private TokenContextAnalyzer $contextAnalyzer = new TokenContextAnalyzer()
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize PHP code into a collection of tokens
|
||||
*/
|
||||
public function tokenize(string $code): TokenCollection
|
||||
{
|
||||
// Ensure code has PHP tags for proper tokenization
|
||||
$processedCode = $this->ensurePhpTags($code);
|
||||
$needsTagRemoval = $processedCode !== $code;
|
||||
|
||||
// Use PhpToken for robust parsing
|
||||
$phpTokens = PhpToken::tokenize($processedCode);
|
||||
|
||||
// Build token collection with context
|
||||
$tokens = [];
|
||||
$context = new TokenContext();
|
||||
|
||||
foreach ($phpTokens as $index => $phpToken) {
|
||||
// Skip artificial PHP tag if we added it
|
||||
if ($needsTagRemoval && $index === 0 && $phpToken->is(T_OPEN_TAG)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update context
|
||||
$context = $this->contextAnalyzer->analyzeContext($phpToken, $phpTokens, $index, $context);
|
||||
|
||||
// Create token with full context
|
||||
$token = $this->createToken($phpToken, $context, $phpTokens, $index);
|
||||
$tokens[] = $token;
|
||||
}
|
||||
|
||||
return new TokenCollection($tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize for discovery purposes (lightweight, metadata focused)
|
||||
* Uses smart line-by-line streaming to minimize processing
|
||||
*/
|
||||
public function tokenizeForDiscovery(string $code): TokenCollection
|
||||
{
|
||||
return $this->streamTokenizeForDiscovery($code);
|
||||
}
|
||||
|
||||
/**
|
||||
* Smart streaming tokenization that stops early when possible
|
||||
* Processes line-by-line and identifies content type early
|
||||
*/
|
||||
private function streamTokenizeForDiscovery(string $code): TokenCollection
|
||||
{
|
||||
$lines = explode("\n", $code);
|
||||
$discoveryState = new DiscoveryState();
|
||||
$processedCode = '';
|
||||
$lineNumber = 0;
|
||||
|
||||
foreach ($lines as $line) {
|
||||
$lineNumber++;
|
||||
$processedCode .= $line . "\n";
|
||||
|
||||
// Quick line-based detection for early stopping
|
||||
$lineContent = trim($line);
|
||||
|
||||
// Update discovery state based on line content
|
||||
$this->updateDiscoveryState($discoveryState, $lineContent, $lineNumber);
|
||||
|
||||
// Check if we can stop early (found all relevant metadata)
|
||||
if ($this->canStopDiscovery($discoveryState, $lineNumber)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Safety limit: don't process more than 100 lines for discovery
|
||||
if ($lineNumber >= 100) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Now tokenize only the relevant portion
|
||||
return $this->tokenizeRelevantPortion($processedCode, $discoveryState);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update discovery state based on line content
|
||||
*/
|
||||
private function updateDiscoveryState(DiscoveryState $state, string $line, int $lineNumber): void
|
||||
{
|
||||
// Namespace detection
|
||||
if (! $state->namespaceFound && str_starts_with($line, 'namespace ')) {
|
||||
$state->namespaceFound = true;
|
||||
$state->namespaceLineFound = $lineNumber;
|
||||
}
|
||||
|
||||
// Use statement detection
|
||||
if (str_starts_with($line, 'use ') && ! str_contains($line, ' as ') && str_contains($line, '\\')) {
|
||||
$state->useStatementsCount++;
|
||||
}
|
||||
|
||||
// Class/Interface/Trait/Enum detection
|
||||
if (preg_match('/^(final\s+)?(abstract\s+)?(readonly\s+)?(class|interface|trait|enum)\s+(\w+)/', $line)) {
|
||||
$state->classFound = true;
|
||||
$state->classLineFound = $lineNumber;
|
||||
}
|
||||
|
||||
// Function detection (public functions are usually the first ones we care about)
|
||||
if (preg_match('/^\s*(public|protected|private)?\s*(static\s+)?function\s+(\w+)/', $line)) {
|
||||
$state->functionsCount++;
|
||||
}
|
||||
|
||||
// Attribute detection
|
||||
if (str_starts_with($line, '#[')) {
|
||||
$state->attributesCount++;
|
||||
}
|
||||
|
||||
// Doc comment detection
|
||||
if (str_starts_with($line, '/**')) {
|
||||
$state->docCommentsCount++;
|
||||
}
|
||||
|
||||
// Class body start
|
||||
if ($state->classFound && str_contains($line, '{')) {
|
||||
$state->classBodyStarted = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we can stop discovery early
|
||||
*/
|
||||
private function canStopDiscovery(DiscoveryState $state, int $currentLine): bool
|
||||
{
|
||||
// If we found a class and we're well into the class body, we can stop
|
||||
if ($state->classFound && $state->classBodyStarted && $currentLine > ($state->classLineFound + 20)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If we haven't found any structural elements after 50 lines, likely not much to discover
|
||||
if ($currentLine > 50 && ! $state->classFound && ! $state->namespaceFound) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize the relevant portion we've identified
|
||||
*/
|
||||
private function tokenizeRelevantPortion(string $code, DiscoveryState $state): TokenCollection
|
||||
{
|
||||
// Use the optimized tokenization on the reduced code
|
||||
$tokens = $this->tokenize($code);
|
||||
|
||||
// Filter to only relevant tokens for discovery
|
||||
$filteredTokens = [];
|
||||
$includeNextString = false;
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
if ($token->isStructural() || $token->isAttribute() || $token->isDocComment()) {
|
||||
$filteredTokens[] = $token;
|
||||
|
||||
// Mark to include the next identifier token (class/function/namespace names)
|
||||
if ($token->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM, T_FUNCTION, T_USE])) {
|
||||
$includeNextString = true;
|
||||
}
|
||||
} elseif ($includeNextString && ($token->is(T_STRING) || $token->is(T_NAME_QUALIFIED))) {
|
||||
// Include both T_STRING and T_NAME_QUALIFIED (for namespaced identifiers)
|
||||
$filteredTokens[] = $token;
|
||||
$includeNextString = false;
|
||||
} elseif ($token->is(T_NS_SEPARATOR)) {
|
||||
// Always include namespace separators for proper namespace parsing
|
||||
$filteredTokens[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
return new TokenCollection($filteredTokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream tokenization for large files
|
||||
*/
|
||||
public function tokenizeStream(string $filePath): \Generator
|
||||
{
|
||||
$handle = fopen($filePath, 'r');
|
||||
if (! $handle) {
|
||||
throw new \RuntimeException("Cannot open file: $filePath");
|
||||
}
|
||||
|
||||
try {
|
||||
$buffer = '';
|
||||
$inPhpBlock = false;
|
||||
|
||||
while (! feof($handle)) {
|
||||
$chunk = fread($handle, 8192); // 8KB chunks
|
||||
$buffer .= $chunk;
|
||||
|
||||
// Find complete PHP blocks in buffer
|
||||
if ($completeBlock = $this->extractCompleteBlock($buffer, $inPhpBlock)) {
|
||||
$tokens = $this->tokenize($completeBlock);
|
||||
yield from $tokens;
|
||||
|
||||
// Keep remainder in buffer
|
||||
$buffer = $this->getBlockRemainder($buffer, $completeBlock);
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining buffer
|
||||
if ($buffer) {
|
||||
$tokens = $this->tokenize($buffer);
|
||||
yield from $tokens;
|
||||
}
|
||||
|
||||
} finally {
|
||||
fclose($handle);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a token with full context information
|
||||
*/
|
||||
private function createToken(
|
||||
PhpToken $phpToken,
|
||||
TokenContext $context,
|
||||
array $allTokens,
|
||||
int $index
|
||||
): Token {
|
||||
// Determine token type with contextual classification
|
||||
$type = $this->classifier->classify($phpToken, $allTokens, $index, $context);
|
||||
|
||||
return new Token(
|
||||
type: $type,
|
||||
value: $phpToken->text,
|
||||
line: $phpToken->line,
|
||||
position: $phpToken->pos ?? 0,
|
||||
id: $phpToken->id,
|
||||
context: $context->clone()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure code has PHP tags for tokenization
|
||||
*/
|
||||
private function ensurePhpTags(string $code): string
|
||||
{
|
||||
$trimmed = trim($code);
|
||||
if (! str_starts_with($trimmed, '<?php') && ! str_starts_with($trimmed, '<?=')) {
|
||||
return "<?php\n" . $code;
|
||||
}
|
||||
|
||||
return $code;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a complete PHP block from buffer
|
||||
*/
|
||||
private function extractCompleteBlock(string &$buffer, bool &$inPhpBlock): ?string
|
||||
{
|
||||
// Simple implementation - can be enhanced for better block detection
|
||||
if (str_contains($buffer, '<?php')) {
|
||||
$inPhpBlock = true;
|
||||
}
|
||||
|
||||
if ($inPhpBlock && str_contains($buffer, '?>')) {
|
||||
$endPos = strpos($buffer, '?>') + 2;
|
||||
$block = substr($buffer, 0, $endPos);
|
||||
$inPhpBlock = false;
|
||||
|
||||
return $block;
|
||||
}
|
||||
|
||||
// If we have enough buffer, process it
|
||||
if (strlen($buffer) > 16384) { // 16KB threshold
|
||||
$block = $buffer;
|
||||
$buffer = '';
|
||||
|
||||
return $block;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get remainder after extracting block
|
||||
*/
|
||||
private function getBlockRemainder(string $buffer, string $extractedBlock): string
|
||||
{
|
||||
$pos = strlen($extractedBlock);
|
||||
|
||||
return substr($buffer, $pos);
|
||||
}
|
||||
}
|
||||
252
src/Framework/Tokenizer/TokenClassifier.php
Normal file
252
src/Framework/Tokenizer/TokenClassifier.php
Normal file
@@ -0,0 +1,252 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer;
|
||||
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenContext;
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenType;
|
||||
use PhpToken;
|
||||
|
||||
/**
|
||||
* Classifies PHP tokens into semantic token types
|
||||
*/
|
||||
final readonly class TokenClassifier
|
||||
{
|
||||
/**
|
||||
* Classify a token based on its context
|
||||
*/
|
||||
public function classify(
|
||||
PhpToken $token,
|
||||
array $allTokens,
|
||||
int $index,
|
||||
TokenContext $context
|
||||
): TokenType {
|
||||
// Handle basic token types first
|
||||
$basicType = $this->getBasicTokenType($token);
|
||||
|
||||
// Apply contextual classification for T_STRING tokens
|
||||
if ($token->is(T_STRING)) {
|
||||
return $this->classifyString($token, $allTokens, $index, $context);
|
||||
}
|
||||
|
||||
return $basicType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get basic token type from PHP token ID
|
||||
*/
|
||||
private function getBasicTokenType(PhpToken $token): TokenType
|
||||
{
|
||||
return match($token->id) {
|
||||
// Keywords
|
||||
T_ABSTRACT, T_AS, T_BREAK, T_CALLABLE, T_CASE, T_CATCH, T_CLASS,
|
||||
T_CLONE, T_CONST, T_CONTINUE, T_DECLARE, T_DEFAULT, T_DO, T_ECHO,
|
||||
T_ELSE, T_ELSEIF, T_EMPTY, T_ENDDECLARE, T_ENDFOR, T_ENDFOREACH,
|
||||
T_ENDIF, T_ENDSWITCH, T_ENDWHILE, T_ENUM, T_EVAL, T_EXIT, T_EXTENDS,
|
||||
T_FINAL, T_FINALLY, T_FN, T_FOR, T_FOREACH, T_FUNCTION, T_GLOBAL,
|
||||
T_GOTO, T_IF, T_IMPLEMENTS, T_INCLUDE, T_INCLUDE_ONCE, T_INSTANCEOF,
|
||||
T_INSTEADOF, T_INTERFACE, T_ISSET, T_LIST, T_MATCH, T_NAMESPACE,
|
||||
T_NEW, T_PRINT, T_PRIVATE, T_PROTECTED, T_PUBLIC, T_READONLY,
|
||||
T_REQUIRE, T_REQUIRE_ONCE, T_RETURN, T_STATIC, T_SWITCH, T_THROW,
|
||||
T_TRAIT, T_TRY, T_UNSET, T_USE, T_VAR, T_WHILE, T_YIELD,
|
||||
T_YIELD_FROM => TokenType::KEYWORD,
|
||||
|
||||
// Variables
|
||||
T_VARIABLE => TokenType::VARIABLE,
|
||||
|
||||
// String literals
|
||||
T_CONSTANT_ENCAPSED_STRING, T_ENCAPSED_AND_WHITESPACE => TokenType::STRING_LITERAL,
|
||||
|
||||
// Number literals
|
||||
T_LNUMBER, T_DNUMBER => TokenType::NUMBER_LITERAL,
|
||||
|
||||
// Comments
|
||||
T_COMMENT => TokenType::COMMENT,
|
||||
T_DOC_COMMENT => TokenType::DOC_COMMENT,
|
||||
|
||||
// Operators
|
||||
T_AND_EQUAL, T_BOOLEAN_AND, T_BOOLEAN_OR, T_COALESCE, T_COALESCE_EQUAL,
|
||||
T_CONCAT_EQUAL, T_DEC, T_DIV_EQUAL, T_DOUBLE_ARROW, T_DOUBLE_COLON,
|
||||
T_ELLIPSIS, T_INC, T_IS_EQUAL, T_IS_GREATER_OR_EQUAL, T_IS_IDENTICAL,
|
||||
T_IS_NOT_EQUAL, T_IS_NOT_IDENTICAL, T_IS_SMALLER_OR_EQUAL, T_LOGICAL_AND,
|
||||
T_LOGICAL_OR, T_LOGICAL_XOR, T_MINUS_EQUAL, T_MOD_EQUAL, T_MUL_EQUAL,
|
||||
T_NULLSAFE_OBJECT_OPERATOR, T_OBJECT_OPERATOR, T_OR_EQUAL, T_PLUS_EQUAL,
|
||||
T_POW, T_POW_EQUAL, T_SL, T_SL_EQUAL, T_SPACESHIP, T_SR, T_SR_EQUAL,
|
||||
T_XOR_EQUAL => TokenType::OPERATOR,
|
||||
|
||||
// Whitespace
|
||||
T_WHITESPACE => TokenType::WHITESPACE,
|
||||
|
||||
// PHP tags
|
||||
T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, T_CLOSE_TAG => TokenType::PHP_TAG,
|
||||
|
||||
// Attributes
|
||||
T_ATTRIBUTE => TokenType::ATTRIBUTE,
|
||||
|
||||
// HTML/inline content
|
||||
T_INLINE_HTML => TokenType::HTML,
|
||||
|
||||
// Default for known tokens
|
||||
default => $this->classifySingleCharacter($token)
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify single character tokens
|
||||
*/
|
||||
private function classifySingleCharacter(PhpToken $token): TokenType
|
||||
{
|
||||
return match($token->text) {
|
||||
// Operators
|
||||
'+', '-', '*', '/', '%', '=', '<', '>', '!', '&', '|', '^', '~', '?', ':' => TokenType::OPERATOR,
|
||||
|
||||
// Brackets
|
||||
'[', ']' => TokenType::BRACKET,
|
||||
|
||||
// Parentheses
|
||||
'(', ')' => TokenType::PARENTHESIS,
|
||||
|
||||
// Braces
|
||||
'{', '}' => TokenType::BRACE,
|
||||
|
||||
// Punctuation
|
||||
',', '.' => TokenType::PUNCTUATION,
|
||||
|
||||
// Semicolon
|
||||
';' => TokenType::SEMICOLON,
|
||||
|
||||
// Default
|
||||
default => TokenType::DEFAULT
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify T_STRING tokens based on context
|
||||
*/
|
||||
private function classifyString(
|
||||
PhpToken $token,
|
||||
array $allTokens,
|
||||
int $index,
|
||||
TokenContext $context
|
||||
): TokenType {
|
||||
// Check previous token for context clues
|
||||
$prevToken = $index > 0 ? $allTokens[$index - 1] : null;
|
||||
$nextToken = $index < count($allTokens) - 1 ? $allTokens[$index + 1] : null;
|
||||
|
||||
// After class/interface/trait/enum keywords
|
||||
if ($prevToken && $prevToken->is([T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM])) {
|
||||
return match($prevToken->id) {
|
||||
T_CLASS => TokenType::CLASS_NAME,
|
||||
T_INTERFACE => TokenType::INTERFACE_NAME,
|
||||
T_TRAIT => TokenType::TRAIT_NAME,
|
||||
T_ENUM => TokenType::ENUM_NAME,
|
||||
default => TokenType::CLASS_NAME
|
||||
};
|
||||
}
|
||||
|
||||
// After new/extends/implements
|
||||
if ($prevToken && $prevToken->is([T_NEW, T_EXTENDS, T_IMPLEMENTS])) {
|
||||
return TokenType::CLASS_NAME;
|
||||
}
|
||||
|
||||
// After function keyword
|
||||
if ($prevToken && $prevToken->is(T_FUNCTION)) {
|
||||
return $context->isInClass ? TokenType::METHOD_NAME : TokenType::FUNCTION_NAME;
|
||||
}
|
||||
|
||||
// After :: (static method/constant)
|
||||
if ($prevToken && $prevToken->is(T_DOUBLE_COLON)) {
|
||||
return $nextToken && $nextToken->text === '('
|
||||
? TokenType::METHOD_NAME
|
||||
: TokenType::CONSTANT_NAME;
|
||||
}
|
||||
|
||||
// After -> (object method/property)
|
||||
if ($prevToken && $prevToken->is([T_OBJECT_OPERATOR, T_NULLSAFE_OBJECT_OPERATOR])) {
|
||||
return $nextToken && $nextToken->text === '('
|
||||
? TokenType::METHOD_NAME
|
||||
: TokenType::PROPERTY_NAME;
|
||||
}
|
||||
|
||||
// In attribute context
|
||||
if ($context->isInAttribute) {
|
||||
return TokenType::ATTRIBUTE_NAME;
|
||||
}
|
||||
|
||||
// Function/method call (followed by parenthesis)
|
||||
if ($nextToken && $nextToken->text === '(') {
|
||||
// Check if it's after 'new' (constructor)
|
||||
if ($prevToken && $prevToken->is(T_NEW)) {
|
||||
return TokenType::CLASS_NAME;
|
||||
}
|
||||
|
||||
return $context->isInClass ? TokenType::METHOD_NAME : TokenType::FUNCTION_NAME;
|
||||
}
|
||||
|
||||
// Constants (all uppercase)
|
||||
if (ctype_upper($token->text) && strlen($token->text) > 1) {
|
||||
return TokenType::CONSTANT_NAME;
|
||||
}
|
||||
|
||||
// Type hints (in function parameters or return types)
|
||||
if ($this->isTypeHintContext($allTokens, $index)) {
|
||||
return TokenType::TYPE_HINT;
|
||||
}
|
||||
|
||||
// Class names (PascalCase)
|
||||
if (preg_match('/^[A-Z][a-zA-Z0-9]*$/', $token->text)) {
|
||||
return TokenType::CLASS_NAME;
|
||||
}
|
||||
|
||||
// Default to keyword if it's a known keyword
|
||||
if ($this->isKeyword($token->text)) {
|
||||
return TokenType::KEYWORD;
|
||||
}
|
||||
|
||||
return TokenType::DEFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we're in a type hint context
|
||||
*/
|
||||
private function isTypeHintContext(array $tokens, int $index): bool
|
||||
{
|
||||
// Look for : (return type) or function parameter context
|
||||
for ($i = $index - 1; $i >= max(0, $index - 5); $i--) {
|
||||
if ($tokens[$i]->text === ':') {
|
||||
return true;
|
||||
}
|
||||
if ($tokens[$i]->is([T_FUNCTION, T_FN])) {
|
||||
// Check if we're in parameter list
|
||||
$parenCount = 0;
|
||||
for ($j = $i; $j < $index; $j++) {
|
||||
if ($tokens[$j]->text === '(') {
|
||||
$parenCount++;
|
||||
}
|
||||
if ($tokens[$j]->text === ')') {
|
||||
$parenCount--;
|
||||
}
|
||||
}
|
||||
|
||||
return $parenCount > 0;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a string is a PHP keyword
|
||||
*/
|
||||
private function isKeyword(string $text): bool
|
||||
{
|
||||
static $keywords = [
|
||||
'true', 'false', 'null', 'void', 'never', 'mixed',
|
||||
'int', 'float', 'string', 'bool', 'array', 'object',
|
||||
'callable', 'iterable', 'resource', 'self', 'parent', 'static',
|
||||
];
|
||||
|
||||
return in_array(strtolower($text), $keywords, true);
|
||||
}
|
||||
}
|
||||
150
src/Framework/Tokenizer/TokenContextAnalyzer.php
Normal file
150
src/Framework/Tokenizer/TokenContextAnalyzer.php
Normal file
@@ -0,0 +1,150 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer;
|
||||
|
||||
use App\Framework\Tokenizer\ValueObjects\TokenContext;
|
||||
use PhpToken;
|
||||
|
||||
/**
|
||||
* Analyzes and maintains token context during parsing
|
||||
*/
|
||||
final class TokenContextAnalyzer
|
||||
{
|
||||
/**
|
||||
* Analyze context for a token
|
||||
*/
|
||||
public function analyzeContext(
|
||||
PhpToken $token,
|
||||
array $allTokens,
|
||||
int $index,
|
||||
TokenContext $currentContext
|
||||
): TokenContext {
|
||||
// Handle scope entering tokens
|
||||
$context = match($token->id) {
|
||||
T_CLASS => $this->enterClassScope($allTokens, $index, $currentContext, 'class'),
|
||||
T_INTERFACE => $this->enterClassScope($allTokens, $index, $currentContext, 'interface'),
|
||||
T_TRAIT => $this->enterClassScope($allTokens, $index, $currentContext, 'trait'),
|
||||
T_ENUM => $this->enterClassScope($allTokens, $index, $currentContext, 'enum'),
|
||||
T_FUNCTION, T_FN => $this->enterFunctionScope($allTokens, $index, $currentContext),
|
||||
T_NAMESPACE => $this->enterNamespaceScope($allTokens, $index, $currentContext),
|
||||
T_ATTRIBUTE => $currentContext->enterScope('attribute'),
|
||||
T_DOC_COMMENT => $currentContext->enterScope('doccomment'),
|
||||
default => $currentContext
|
||||
};
|
||||
|
||||
// Handle scope exiting
|
||||
if ($token->text === '}') {
|
||||
$context = $this->handleClosingBrace($context);
|
||||
} elseif ($token->text === ']' && $context->isInAttribute) {
|
||||
$context = $context->exitScope();
|
||||
} elseif ($context->isInDocComment && ! $token->is(T_DOC_COMMENT)) {
|
||||
$context = $context->exitScope();
|
||||
}
|
||||
|
||||
return $context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enter a class-like scope
|
||||
*/
|
||||
private function enterClassScope(
|
||||
array $tokens,
|
||||
int $index,
|
||||
TokenContext $context,
|
||||
string $type
|
||||
): TokenContext {
|
||||
$name = $this->findNextIdentifier($tokens, $index);
|
||||
|
||||
return $context->enterScope($type, $name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enter a function scope
|
||||
*/
|
||||
private function enterFunctionScope(
|
||||
array $tokens,
|
||||
int $index,
|
||||
TokenContext $context
|
||||
): TokenContext {
|
||||
$name = $this->findNextIdentifier($tokens, $index);
|
||||
$type = $context->isInClass ? 'method' : 'function';
|
||||
|
||||
return $context->enterScope($type, $name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enter a namespace scope
|
||||
*/
|
||||
private function enterNamespaceScope(
|
||||
array $tokens,
|
||||
int $index,
|
||||
TokenContext $context
|
||||
): TokenContext {
|
||||
$namespace = $this->extractNamespace($tokens, $index);
|
||||
|
||||
return $context->enterScope('namespace', $namespace);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle closing brace - exit appropriate scope
|
||||
*/
|
||||
private function handleClosingBrace(TokenContext $context): TokenContext
|
||||
{
|
||||
$currentScope = $context->getCurrentScopeType();
|
||||
|
||||
// Only exit scope for structural braces
|
||||
$shouldExit = match($currentScope) {
|
||||
'class', 'interface', 'trait', 'enum',
|
||||
'function', 'method', 'namespace' => true,
|
||||
default => false
|
||||
};
|
||||
|
||||
return $shouldExit ? $context->exitScope() : $context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the next identifier token
|
||||
*/
|
||||
private function findNextIdentifier(array $tokens, int $startIndex): ?string
|
||||
{
|
||||
for ($i = $startIndex + 1; $i < count($tokens); $i++) {
|
||||
if ($tokens[$i]->is(T_STRING)) {
|
||||
return $tokens[$i]->text;
|
||||
}
|
||||
// Stop if we hit structural elements
|
||||
if ($tokens[$i]->text === '{' || $tokens[$i]->text === ';') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract namespace from tokens
|
||||
*/
|
||||
private function extractNamespace(array $tokens, int $startIndex): string
|
||||
{
|
||||
$parts = [];
|
||||
$inNamespace = false;
|
||||
|
||||
for ($i = $startIndex + 1; $i < count($tokens); $i++) {
|
||||
$token = $tokens[$i];
|
||||
|
||||
if ($token->is(T_STRING) || $token->is(T_NS_SEPARATOR) || $token->is(T_NAME_QUALIFIED)) {
|
||||
$inNamespace = true;
|
||||
$parts[] = $token->text;
|
||||
} elseif ($inNamespace && ($token->text === ';' || $token->text === '{')) {
|
||||
break;
|
||||
} elseif ($token->is(T_WHITESPACE)) {
|
||||
continue;
|
||||
} elseif ($inNamespace) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return implode('', $parts);
|
||||
}
|
||||
}
|
||||
56
src/Framework/Tokenizer/ValueObjects/DiscoveryState.php
Normal file
56
src/Framework/Tokenizer/ValueObjects/DiscoveryState.php
Normal file
@@ -0,0 +1,56 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\ValueObjects;
|
||||
|
||||
/**
|
||||
* State tracking for smart discovery tokenization
|
||||
* Tracks what we've found so far to enable early stopping
|
||||
*/
|
||||
final class DiscoveryState
|
||||
{
|
||||
public bool $namespaceFound = false;
|
||||
|
||||
public int $namespaceLineFound = 0;
|
||||
|
||||
public bool $classFound = false;
|
||||
|
||||
public int $classLineFound = 0;
|
||||
|
||||
public bool $classBodyStarted = false;
|
||||
|
||||
public int $useStatementsCount = 0;
|
||||
|
||||
public int $functionsCount = 0;
|
||||
|
||||
public int $attributesCount = 0;
|
||||
|
||||
public int $docCommentsCount = 0;
|
||||
|
||||
/**
|
||||
* Check if we have found significant structural content
|
||||
*/
|
||||
public function hasSignificantContent(): bool
|
||||
{
|
||||
return $this->namespaceFound ||
|
||||
$this->classFound ||
|
||||
$this->useStatementsCount > 0 ||
|
||||
$this->functionsCount > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a summary of what we've discovered
|
||||
*/
|
||||
public function getSummary(): array
|
||||
{
|
||||
return [
|
||||
'namespace' => $this->namespaceFound,
|
||||
'class' => $this->classFound,
|
||||
'uses' => $this->useStatementsCount,
|
||||
'functions' => $this->functionsCount,
|
||||
'attributes' => $this->attributesCount,
|
||||
'lines_processed' => max($this->namespaceLineFound, $this->classLineFound),
|
||||
];
|
||||
}
|
||||
}
|
||||
122
src/Framework/Tokenizer/ValueObjects/Token.php
Normal file
122
src/Framework/Tokenizer/ValueObjects/Token.php
Normal file
@@ -0,0 +1,122 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\ValueObjects;
|
||||
|
||||
/**
|
||||
* Represents a single token with full context
|
||||
*/
|
||||
final readonly class Token
|
||||
{
|
||||
public function __construct(
|
||||
public TokenType $type,
|
||||
public string $value,
|
||||
public int $line,
|
||||
public int $position,
|
||||
public int $id,
|
||||
public TokenContext $context
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is a specific PHP token type
|
||||
*/
|
||||
public function is(int|array $tokenId): bool
|
||||
{
|
||||
if (is_array($tokenId)) {
|
||||
return in_array($this->id, $tokenId, true);
|
||||
}
|
||||
|
||||
return $this->id === $tokenId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is structural (class, function, namespace, etc.)
|
||||
*/
|
||||
public function isStructural(): bool
|
||||
{
|
||||
return $this->is([
|
||||
T_CLASS, T_INTERFACE, T_TRAIT, T_ENUM,
|
||||
T_FUNCTION, T_NAMESPACE, T_USE,
|
||||
T_EXTENDS, T_IMPLEMENTS,
|
||||
T_PUBLIC, T_PROTECTED, T_PRIVATE,
|
||||
T_STATIC, T_FINAL, T_ABSTRACT, T_READONLY,
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is an attribute
|
||||
*/
|
||||
public function isAttribute(): bool
|
||||
{
|
||||
return $this->is(T_ATTRIBUTE) ||
|
||||
($this->context->isInAttribute && $this->is(T_STRING));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is a doc comment
|
||||
*/
|
||||
public function isDocComment(): bool
|
||||
{
|
||||
return $this->is(T_DOC_COMMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is a keyword
|
||||
*/
|
||||
public function isKeyword(): bool
|
||||
{
|
||||
return $this->type === TokenType::KEYWORD;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token is an identifier (class name, function name, etc.)
|
||||
*/
|
||||
public function isIdentifier(): bool
|
||||
{
|
||||
return in_array($this->type, [
|
||||
TokenType::CLASS_NAME,
|
||||
TokenType::FUNCTION_NAME,
|
||||
TokenType::METHOD_NAME,
|
||||
TokenType::PROPERTY_NAME,
|
||||
TokenType::CONSTANT_NAME,
|
||||
], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get token length
|
||||
*/
|
||||
public function getLength(): int
|
||||
{
|
||||
return strlen($this->value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get end position
|
||||
*/
|
||||
public function getEndPosition(): int
|
||||
{
|
||||
return $this->position + $this->getLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this token contains a specific position
|
||||
*/
|
||||
public function containsPosition(int $position): bool
|
||||
{
|
||||
return $position >= $this->position && $position < $this->getEndPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a clean version of the value (trimmed, no quotes, etc.)
|
||||
*/
|
||||
public function getCleanValue(): string
|
||||
{
|
||||
return match($this->type) {
|
||||
TokenType::STRING_LITERAL => trim($this->value, '"\''),
|
||||
TokenType::DOC_COMMENT => trim($this->value, '/*'),
|
||||
default => trim($this->value)
|
||||
};
|
||||
}
|
||||
}
|
||||
245
src/Framework/Tokenizer/ValueObjects/TokenCollection.php
Normal file
245
src/Framework/Tokenizer/ValueObjects/TokenCollection.php
Normal file
@@ -0,0 +1,245 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\ValueObjects;
|
||||
|
||||
use ArrayIterator;
|
||||
use Countable;
|
||||
use IteratorAggregate;
|
||||
use Traversable;
|
||||
|
||||
/**
|
||||
* Collection of tokens with utility methods
|
||||
*/
|
||||
final readonly class TokenCollection implements IteratorAggregate, Countable
|
||||
{
|
||||
/**
|
||||
* @param array<Token> $tokens
|
||||
*/
|
||||
public function __construct(
|
||||
private array $tokens = []
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get iterator for the collection
|
||||
*/
|
||||
public function getIterator(): Traversable
|
||||
{
|
||||
return new ArrayIterator($this->tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Count tokens in collection
|
||||
*/
|
||||
public function count(): int
|
||||
{
|
||||
return count($this->tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all tokens as array
|
||||
* @return array<Token>
|
||||
*/
|
||||
public function toArray(): array
|
||||
{
|
||||
return $this->tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get token at index
|
||||
*/
|
||||
public function get(int $index): ?Token
|
||||
{
|
||||
return $this->tokens[$index] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter tokens by predicate
|
||||
*/
|
||||
public function filter(callable $predicate): self
|
||||
{
|
||||
return new self(array_values(array_filter($this->tokens, $predicate)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter by token type
|
||||
*/
|
||||
public function filterByType(TokenType ...$types): self
|
||||
{
|
||||
return $this->filter(fn (Token $token) => in_array($token->type, $types, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter by PHP token ID
|
||||
*/
|
||||
public function filterById(int ...$ids): self
|
||||
{
|
||||
return $this->filter(fn (Token $token) => in_array($token->id, $ids, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only structural tokens
|
||||
*/
|
||||
public function getStructural(): self
|
||||
{
|
||||
return $this->filter(fn (Token $token) => $token->isStructural());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only identifiers
|
||||
*/
|
||||
public function getIdentifiers(): self
|
||||
{
|
||||
return $this->filter(fn (Token $token) => $token->isIdentifier());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get tokens in line range
|
||||
*/
|
||||
public function getInLineRange(int $startLine, int $endLine): self
|
||||
{
|
||||
return $this->filter(
|
||||
fn (Token $token) =>
|
||||
$token->line >= $startLine && $token->line <= $endLine
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get tokens at specific line
|
||||
*/
|
||||
public function getAtLine(int $line): self
|
||||
{
|
||||
return $this->filter(fn (Token $token) => $token->line === $line);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find first token matching predicate
|
||||
*/
|
||||
public function findFirst(callable $predicate): ?Token
|
||||
{
|
||||
foreach ($this->tokens as $token) {
|
||||
if ($predicate($token)) {
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find first token of type
|
||||
*/
|
||||
public function findFirstOfType(TokenType $type): ?Token
|
||||
{
|
||||
return $this->findFirst(fn (Token $token) => $token->type === $type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Map tokens to another form
|
||||
*/
|
||||
public function map(callable $mapper): array
|
||||
{
|
||||
return array_map($mapper, $this->tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all values
|
||||
*/
|
||||
public function getValues(): array
|
||||
{
|
||||
return $this->map(fn (Token $token) => $token->value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all clean values
|
||||
*/
|
||||
public function getCleanValues(): array
|
||||
{
|
||||
return $this->map(fn (Token $token) => $token->getCleanValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get tokens grouped by type
|
||||
* @return array<string, array<Token>>
|
||||
*/
|
||||
public function groupByType(): array
|
||||
{
|
||||
$groups = [];
|
||||
foreach ($this->tokens as $token) {
|
||||
$type = $token->type->value;
|
||||
if (! isset($groups[$type])) {
|
||||
$groups[$type] = [];
|
||||
}
|
||||
$groups[$type][] = $token;
|
||||
}
|
||||
|
||||
return $groups;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get tokens grouped by line
|
||||
* @return array<int, array<Token>>
|
||||
*/
|
||||
public function groupByLine(): array
|
||||
{
|
||||
$groups = [];
|
||||
foreach ($this->tokens as $token) {
|
||||
if (! isset($groups[$token->line])) {
|
||||
$groups[$token->line] = [];
|
||||
}
|
||||
$groups[$token->line][] = $token;
|
||||
}
|
||||
|
||||
return $groups;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if collection is empty
|
||||
*/
|
||||
public function isEmpty(): bool
|
||||
{
|
||||
return empty($this->tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get first token
|
||||
*/
|
||||
public function first(): ?Token
|
||||
{
|
||||
return $this->tokens[0] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last token
|
||||
*/
|
||||
public function last(): ?Token
|
||||
{
|
||||
return empty($this->tokens) ? null : $this->tokens[count($this->tokens) - 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Slice collection
|
||||
*/
|
||||
public function slice(int $offset, ?int $length = null): self
|
||||
{
|
||||
return new self(array_slice($this->tokens, $offset, $length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge with another collection
|
||||
*/
|
||||
public function merge(self $other): self
|
||||
{
|
||||
return new self(array_merge($this->tokens, $other->tokens));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert to string (concatenate all values)
|
||||
*/
|
||||
public function toString(): string
|
||||
{
|
||||
return implode('', $this->getValues());
|
||||
}
|
||||
}
|
||||
208
src/Framework/Tokenizer/ValueObjects/TokenContext.php
Normal file
208
src/Framework/Tokenizer/ValueObjects/TokenContext.php
Normal file
@@ -0,0 +1,208 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\ValueObjects;
|
||||
|
||||
/**
|
||||
* Context information for a token
|
||||
*/
|
||||
final class TokenContext
|
||||
{
|
||||
public function __construct(
|
||||
public readonly bool $isInClass = false,
|
||||
public readonly bool $isInFunction = false,
|
||||
public readonly bool $isInNamespace = false,
|
||||
public readonly bool $isInAttribute = false,
|
||||
public readonly bool $isInDocComment = false,
|
||||
public readonly ?string $currentClass = null,
|
||||
public readonly ?string $currentFunction = null,
|
||||
public readonly ?string $currentNamespace = null,
|
||||
public readonly array $scopeStack = [],
|
||||
public readonly int $nestingLevel = 0
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new context with updated values
|
||||
*/
|
||||
public function with(array $updates): self
|
||||
{
|
||||
return new self(
|
||||
isInClass: $updates['isInClass'] ?? $this->isInClass,
|
||||
isInFunction: $updates['isInFunction'] ?? $this->isInFunction,
|
||||
isInNamespace: $updates['isInNamespace'] ?? $this->isInNamespace,
|
||||
isInAttribute: $updates['isInAttribute'] ?? $this->isInAttribute,
|
||||
isInDocComment: $updates['isInDocComment'] ?? $this->isInDocComment,
|
||||
currentClass: $updates['currentClass'] ?? $this->currentClass,
|
||||
currentFunction: $updates['currentFunction'] ?? $this->currentFunction,
|
||||
currentNamespace: $updates['currentNamespace'] ?? $this->currentNamespace,
|
||||
scopeStack: $updates['scopeStack'] ?? $this->scopeStack,
|
||||
nestingLevel: $updates['nestingLevel'] ?? $this->nestingLevel
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enter a new scope
|
||||
*/
|
||||
public function enterScope(string $type, ?string $name = null): self
|
||||
{
|
||||
$newStack = $this->scopeStack;
|
||||
$newStack[] = ['type' => $type, 'name' => $name];
|
||||
|
||||
$updates = [
|
||||
'scopeStack' => $newStack,
|
||||
'nestingLevel' => $this->nestingLevel + 1,
|
||||
];
|
||||
|
||||
// Use match for cleaner, type-safe scope handling
|
||||
$scopeUpdates = match($type) {
|
||||
'class', 'interface', 'trait', 'enum' => [
|
||||
'isInClass' => true,
|
||||
'currentClass' => $name,
|
||||
],
|
||||
'function', 'method' => [
|
||||
'isInFunction' => true,
|
||||
'currentFunction' => $name,
|
||||
],
|
||||
'namespace' => [
|
||||
'isInNamespace' => true,
|
||||
'currentNamespace' => $name,
|
||||
],
|
||||
'attribute' => [
|
||||
'isInAttribute' => true,
|
||||
],
|
||||
'doccomment' => [
|
||||
'isInDocComment' => true,
|
||||
],
|
||||
default => []
|
||||
};
|
||||
|
||||
return $this->with([...$updates, ...$scopeUpdates]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Exit current scope
|
||||
*/
|
||||
public function exitScope(): self
|
||||
{
|
||||
if (empty($this->scopeStack)) {
|
||||
return $this;
|
||||
}
|
||||
|
||||
$newStack = $this->scopeStack;
|
||||
$exitedScope = array_pop($newStack);
|
||||
|
||||
$updates = [
|
||||
'scopeStack' => $newStack,
|
||||
'nestingLevel' => max(0, $this->nestingLevel - 1),
|
||||
];
|
||||
|
||||
// Update context based on remaining stack
|
||||
if (empty($newStack)) {
|
||||
$updates = [
|
||||
...$updates,
|
||||
'isInClass' => false,
|
||||
'isInFunction' => false,
|
||||
'isInAttribute' => false,
|
||||
'isInDocComment' => false,
|
||||
'currentClass' => null,
|
||||
'currentFunction' => null,
|
||||
];
|
||||
} else {
|
||||
// Check what scopes we're still in
|
||||
$stillInClass = false;
|
||||
$stillInFunction = false;
|
||||
$stillInAttribute = false;
|
||||
$stillInDocComment = false;
|
||||
$currentClass = null;
|
||||
$currentFunction = null;
|
||||
|
||||
foreach ($newStack as $scope) {
|
||||
$scopeType = $scope['type'];
|
||||
|
||||
match($scopeType) {
|
||||
'class', 'interface', 'trait', 'enum' => ($stillInClass = true) && ($currentClass = $scope['name']),
|
||||
'function', 'method' => ($stillInFunction = true) && ($currentFunction = $scope['name']),
|
||||
'attribute' => $stillInAttribute = true,
|
||||
'doccomment' => $stillInDocComment = true,
|
||||
default => null
|
||||
};
|
||||
}
|
||||
|
||||
$updates = [
|
||||
...$updates,
|
||||
'isInClass' => $stillInClass,
|
||||
'isInFunction' => $stillInFunction,
|
||||
'isInAttribute' => $stillInAttribute,
|
||||
'isInDocComment' => $stillInDocComment,
|
||||
'currentClass' => $currentClass,
|
||||
'currentFunction' => $currentFunction,
|
||||
];
|
||||
}
|
||||
|
||||
return $this->with($updates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current scope type
|
||||
*/
|
||||
public function getCurrentScopeType(): ?string
|
||||
{
|
||||
if (empty($this->scopeStack)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$currentScope = $this->scopeStack[array_key_last($this->scopeStack)];
|
||||
|
||||
return $currentScope['type'] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we're in a specific scope type
|
||||
*/
|
||||
public function isInScopeType(string $type): bool
|
||||
{
|
||||
return match($type) {
|
||||
'class' => $this->isInClass,
|
||||
'function' => $this->isInFunction,
|
||||
'namespace' => $this->isInNamespace,
|
||||
'attribute' => $this->isInAttribute,
|
||||
'doccomment' => $this->isInDocComment,
|
||||
default => false
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get fully qualified name for current context
|
||||
*/
|
||||
public function getFullyQualifiedName(?string $name = null): string
|
||||
{
|
||||
$parts = array_filter([
|
||||
$this->currentNamespace,
|
||||
$this->currentClass,
|
||||
$name,
|
||||
]);
|
||||
|
||||
return implode('\\', $parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clone the context
|
||||
*/
|
||||
public function clone(): self
|
||||
{
|
||||
return new self(
|
||||
isInClass: $this->isInClass,
|
||||
isInFunction: $this->isInFunction,
|
||||
isInNamespace: $this->isInNamespace,
|
||||
isInAttribute: $this->isInAttribute,
|
||||
isInDocComment: $this->isInDocComment,
|
||||
currentClass: $this->currentClass,
|
||||
currentFunction: $this->currentFunction,
|
||||
currentNamespace: $this->currentNamespace,
|
||||
scopeStack: $this->scopeStack,
|
||||
nestingLevel: $this->nestingLevel
|
||||
);
|
||||
}
|
||||
}
|
||||
132
src/Framework/Tokenizer/ValueObjects/TokenType.php
Normal file
132
src/Framework/Tokenizer/ValueObjects/TokenType.php
Normal file
@@ -0,0 +1,132 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Framework\Tokenizer\ValueObjects;
|
||||
|
||||
/**
|
||||
* Token type enumeration for semantic classification
|
||||
*/
|
||||
enum TokenType: string
|
||||
{
|
||||
// Structural
|
||||
case KEYWORD = 'keyword';
|
||||
case CLASS_NAME = 'class_name';
|
||||
case INTERFACE_NAME = 'interface_name';
|
||||
case TRAIT_NAME = 'trait_name';
|
||||
case ENUM_NAME = 'enum_name';
|
||||
case NAMESPACE_NAME = 'namespace_name';
|
||||
|
||||
// Identifiers
|
||||
case FUNCTION_NAME = 'function_name';
|
||||
case METHOD_NAME = 'method_name';
|
||||
case PROPERTY_NAME = 'property_name';
|
||||
case CONSTANT_NAME = 'constant_name';
|
||||
case VARIABLE = 'variable';
|
||||
case PARAMETER = 'parameter';
|
||||
|
||||
// Literals
|
||||
case STRING_LITERAL = 'string_literal';
|
||||
case NUMBER_LITERAL = 'number_literal';
|
||||
case BOOLEAN_LITERAL = 'boolean_literal';
|
||||
case NULL_LITERAL = 'null_literal';
|
||||
|
||||
// Comments
|
||||
case DOC_COMMENT = 'doc_comment';
|
||||
case DOC_TAG = 'doc_tag';
|
||||
case DOC_TYPE = 'doc_type';
|
||||
case DOC_VARIABLE = 'doc_variable';
|
||||
case DOC_TEXT = 'doc_text';
|
||||
case COMMENT = 'comment';
|
||||
|
||||
// Operators and syntax
|
||||
case OPERATOR = 'operator';
|
||||
case PUNCTUATION = 'punctuation';
|
||||
case BRACKET = 'bracket';
|
||||
case PARENTHESIS = 'parenthesis';
|
||||
case BRACE = 'brace';
|
||||
case SEMICOLON = 'semicolon';
|
||||
|
||||
// Attributes
|
||||
case ATTRIBUTE = 'attribute';
|
||||
case ATTRIBUTE_NAME = 'attribute_name';
|
||||
case ATTRIBUTE_ARGUMENT = 'attribute_argument';
|
||||
|
||||
// Types
|
||||
case TYPE_HINT = 'type_hint';
|
||||
case RETURN_TYPE = 'return_type';
|
||||
case UNION_TYPE = 'union_type';
|
||||
case INTERSECTION_TYPE = 'intersection_type';
|
||||
|
||||
// Special
|
||||
case WHITESPACE = 'whitespace';
|
||||
case PHP_TAG = 'php_tag';
|
||||
case HTML = 'html';
|
||||
case DEFAULT = 'default';
|
||||
case ERROR = 'error';
|
||||
|
||||
/**
|
||||
* Get CSS class for syntax highlighting
|
||||
*/
|
||||
public function getCssClass(): string
|
||||
{
|
||||
return 'token-' . str_replace('_', '-', $this->value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get color for terminal output
|
||||
*/
|
||||
public function getTerminalColor(): string
|
||||
{
|
||||
return match($this) {
|
||||
self::KEYWORD => "\033[35m", // Magenta
|
||||
self::CLASS_NAME,
|
||||
self::INTERFACE_NAME,
|
||||
self::TRAIT_NAME,
|
||||
self::ENUM_NAME => "\033[36m", // Cyan
|
||||
self::FUNCTION_NAME,
|
||||
self::METHOD_NAME => "\033[33m", // Yellow
|
||||
self::VARIABLE,
|
||||
self::PARAMETER => "\033[37m", // White
|
||||
self::STRING_LITERAL => "\033[32m", // Green
|
||||
self::NUMBER_LITERAL => "\033[34m", // Blue
|
||||
self::COMMENT,
|
||||
self::DOC_COMMENT => "\033[90m", // Gray
|
||||
self::ATTRIBUTE,
|
||||
self::ATTRIBUTE_NAME => "\033[95m", // Light Magenta
|
||||
self::OPERATOR => "\033[31m", // Red
|
||||
default => "\033[0m" // Reset
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this is a structural token type
|
||||
*/
|
||||
public function isStructural(): bool
|
||||
{
|
||||
return in_array($this, [
|
||||
self::CLASS_NAME,
|
||||
self::INTERFACE_NAME,
|
||||
self::TRAIT_NAME,
|
||||
self::ENUM_NAME,
|
||||
self::NAMESPACE_NAME,
|
||||
self::FUNCTION_NAME,
|
||||
self::METHOD_NAME,
|
||||
], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if this is a comment type
|
||||
*/
|
||||
public function isComment(): bool
|
||||
{
|
||||
return in_array($this, [
|
||||
self::COMMENT,
|
||||
self::DOC_COMMENT,
|
||||
self::DOC_TAG,
|
||||
self::DOC_TYPE,
|
||||
self::DOC_VARIABLE,
|
||||
self::DOC_TEXT,
|
||||
], true);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user