317 lines
7.7 KiB
PHP
317 lines
7.7 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Framework\Tokenizer\ValueObjects;
|
|
|
|
use ArrayIterator;
|
|
use Countable;
|
|
use IteratorAggregate;
|
|
use Traversable;
|
|
|
|
/**
|
|
* Collection of tokens with utility methods
|
|
*/
|
|
final readonly class TokenCollection implements IteratorAggregate, Countable
|
|
{
|
|
/**
|
|
* @param array<Token> $tokens
|
|
*/
|
|
public function __construct(
|
|
private array $tokens = []
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Get iterator for the collection
|
|
*/
|
|
public function getIterator(): Traversable
|
|
{
|
|
return new ArrayIterator($this->tokens);
|
|
}
|
|
|
|
/**
|
|
* Count tokens in collection
|
|
*/
|
|
public function count(): int
|
|
{
|
|
return count($this->tokens);
|
|
}
|
|
|
|
/**
|
|
* Get all tokens as array
|
|
* @return array<Token>
|
|
*/
|
|
public function toArray(): array
|
|
{
|
|
return $this->tokens;
|
|
}
|
|
|
|
/**
|
|
* Get token at index
|
|
*/
|
|
public function get(int $index): ?Token
|
|
{
|
|
return $this->tokens[$index] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Filter tokens by predicate
|
|
*/
|
|
public function filter(callable $predicate): self
|
|
{
|
|
return new self(array_values(array_filter($this->tokens, $predicate)));
|
|
}
|
|
|
|
/**
|
|
* Filter by token type
|
|
*/
|
|
public function filterByType(TokenType ...$types): self
|
|
{
|
|
return $this->filter(fn (Token $token) => in_array($token->type, $types, true));
|
|
}
|
|
|
|
/**
|
|
* Filter by PHP token ID
|
|
*/
|
|
public function filterById(int ...$ids): self
|
|
{
|
|
return $this->filter(fn (Token $token) => in_array($token->id, $ids, true));
|
|
}
|
|
|
|
/**
|
|
* Get only structural tokens
|
|
*/
|
|
public function getStructural(): self
|
|
{
|
|
return $this->filter(fn (Token $token) => $token->isStructural());
|
|
}
|
|
|
|
/**
|
|
* Get only identifiers
|
|
*/
|
|
public function getIdentifiers(): self
|
|
{
|
|
return $this->filter(fn (Token $token) => $token->isIdentifier());
|
|
}
|
|
|
|
/**
|
|
* Get tokens in line range
|
|
*/
|
|
public function getInLineRange(int $startLine, int $endLine): self
|
|
{
|
|
return $this->filter(
|
|
fn (Token $token) =>
|
|
$token->line >= $startLine && $token->line <= $endLine
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get tokens at specific line
|
|
*/
|
|
public function getAtLine(int $line): self
|
|
{
|
|
return $this->filter(fn (Token $token) => $token->line === $line);
|
|
}
|
|
|
|
/**
|
|
* Find first token matching predicate
|
|
*/
|
|
public function findFirst(callable $predicate): ?Token
|
|
{
|
|
foreach ($this->tokens as $token) {
|
|
if ($predicate($token)) {
|
|
return $token;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Find first token of type
|
|
*/
|
|
public function findFirstOfType(TokenType $type): ?Token
|
|
{
|
|
return $this->findFirst(fn (Token $token) => $token->type === $type);
|
|
}
|
|
|
|
/**
|
|
* Map tokens to another form
|
|
*/
|
|
public function map(callable $mapper): array
|
|
{
|
|
return array_map($mapper, $this->tokens);
|
|
}
|
|
|
|
/**
|
|
* Extract all values
|
|
*/
|
|
public function getValues(): array
|
|
{
|
|
return $this->map(fn (Token $token) => $token->value);
|
|
}
|
|
|
|
/**
|
|
* Extract all clean values
|
|
*/
|
|
public function getCleanValues(): array
|
|
{
|
|
return $this->map(fn (Token $token) => $token->getCleanValue());
|
|
}
|
|
|
|
/**
|
|
* Get tokens grouped by type
|
|
* @return array<string, array<Token>>
|
|
*/
|
|
public function groupByType(): array
|
|
{
|
|
$groups = [];
|
|
foreach ($this->tokens as $token) {
|
|
$type = $token->type->value;
|
|
if (! isset($groups[$type])) {
|
|
$groups[$type] = [];
|
|
}
|
|
$groups[$type][] = $token;
|
|
}
|
|
|
|
return $groups;
|
|
}
|
|
|
|
/**
|
|
* Get tokens grouped by line
|
|
* @return array<int, array<Token>>
|
|
*/
|
|
public function groupByLine(): array
|
|
{
|
|
$groups = [];
|
|
foreach ($this->tokens as $token) {
|
|
// Split whitespace tokens that span multiple lines
|
|
if ($token->type === TokenType::WHITESPACE && str_contains($token->value, "\n")) {
|
|
$splitTokens = $this->splitWhitespaceAtNewlines($token);
|
|
foreach ($splitTokens as $splitToken) {
|
|
if (! isset($groups[$splitToken->line])) {
|
|
$groups[$splitToken->line] = [];
|
|
}
|
|
$groups[$splitToken->line][] = $splitToken;
|
|
}
|
|
} else {
|
|
if (! isset($groups[$token->line])) {
|
|
$groups[$token->line] = [];
|
|
}
|
|
$groups[$token->line][] = $token;
|
|
}
|
|
}
|
|
|
|
return $groups;
|
|
}
|
|
|
|
/**
|
|
* Split a whitespace token at newline boundaries
|
|
*
|
|
* When a whitespace token spans multiple lines like "\n ", the leading spaces
|
|
* after the newline belong to the NEXT line, not the current one.
|
|
*
|
|
* Example: Token with value "\n " on line 13:
|
|
* - "\n" belongs to line 13 (ends the line)
|
|
* - " " belongs to line 14 (starts the next line)
|
|
*
|
|
* @return Token[]
|
|
*/
|
|
private function splitWhitespaceAtNewlines(Token $token): array
|
|
{
|
|
$parts = preg_split('/(\r\n|\n|\r)/', $token->value, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
$result = [];
|
|
|
|
// Calculate starting line: token.line is where it ENDS
|
|
$startLine = $token->line - substr_count($token->value, "\n");
|
|
$currentLine = $startLine;
|
|
|
|
for ($i = 0; $i < count($parts); $i++) {
|
|
$part = $parts[$i];
|
|
|
|
// Skip empty parts
|
|
if ($part === '') {
|
|
continue;
|
|
}
|
|
|
|
// Check if this is a newline delimiter
|
|
$isNewline = in_array($part, ["\r\n", "\n", "\r"], true);
|
|
|
|
if ($isNewline) {
|
|
// Newline belongs to current line (ends it)
|
|
$result[] = new Token(
|
|
type: TokenType::WHITESPACE,
|
|
value: $part,
|
|
line: $currentLine,
|
|
position: $token->position,
|
|
id: $token->id,
|
|
context: $token->context
|
|
);
|
|
$currentLine++;
|
|
} else {
|
|
// Non-newline whitespace: assign to current line
|
|
// (which is the line AFTER the newline if we just processed one)
|
|
$result[] = new Token(
|
|
type: TokenType::WHITESPACE,
|
|
value: $part,
|
|
line: $currentLine,
|
|
position: $token->position,
|
|
id: $token->id,
|
|
context: $token->context
|
|
);
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Check if collection is empty
|
|
*/
|
|
public function isEmpty(): bool
|
|
{
|
|
return empty($this->tokens);
|
|
}
|
|
|
|
/**
|
|
* Get first token
|
|
*/
|
|
public function first(): ?Token
|
|
{
|
|
return $this->tokens[0] ?? null;
|
|
}
|
|
|
|
/**
|
|
* Get last token
|
|
*/
|
|
public function last(): ?Token
|
|
{
|
|
return empty($this->tokens) ? null : $this->tokens[count($this->tokens) - 1];
|
|
}
|
|
|
|
/**
|
|
* Slice collection
|
|
*/
|
|
public function slice(int $offset, ?int $length = null): self
|
|
{
|
|
return new self(array_slice($this->tokens, $offset, $length));
|
|
}
|
|
|
|
/**
|
|
* Merge with another collection
|
|
*/
|
|
public function merge(self $other): self
|
|
{
|
|
return new self(array_merge($this->tokens, $other->tokens));
|
|
}
|
|
|
|
/**
|
|
* Convert to string (concatenate all values)
|
|
*/
|
|
public function toString(): string
|
|
{
|
|
return implode('', $this->getValues());
|
|
}
|
|
}
|