Files
michaelschiemer/src/Framework/Tokenizer/ValueObjects/TokenCollection.php

317 lines
7.7 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Framework\Tokenizer\ValueObjects;
use ArrayIterator;
use Countable;
use IteratorAggregate;
use Traversable;
/**
* Collection of tokens with utility methods
*/
final readonly class TokenCollection implements IteratorAggregate, Countable
{
/**
* @param array<Token> $tokens
*/
public function __construct(
private array $tokens = []
) {
}
/**
* Get iterator for the collection
*/
public function getIterator(): Traversable
{
return new ArrayIterator($this->tokens);
}
/**
* Count tokens in collection
*/
public function count(): int
{
return count($this->tokens);
}
/**
* Get all tokens as array
* @return array<Token>
*/
public function toArray(): array
{
return $this->tokens;
}
/**
* Get token at index
*/
public function get(int $index): ?Token
{
return $this->tokens[$index] ?? null;
}
/**
* Filter tokens by predicate
*/
public function filter(callable $predicate): self
{
return new self(array_values(array_filter($this->tokens, $predicate)));
}
/**
* Filter by token type
*/
public function filterByType(TokenType ...$types): self
{
return $this->filter(fn (Token $token) => in_array($token->type, $types, true));
}
/**
* Filter by PHP token ID
*/
public function filterById(int ...$ids): self
{
return $this->filter(fn (Token $token) => in_array($token->id, $ids, true));
}
/**
* Get only structural tokens
*/
public function getStructural(): self
{
return $this->filter(fn (Token $token) => $token->isStructural());
}
/**
* Get only identifiers
*/
public function getIdentifiers(): self
{
return $this->filter(fn (Token $token) => $token->isIdentifier());
}
/**
* Get tokens in line range
*/
public function getInLineRange(int $startLine, int $endLine): self
{
return $this->filter(
fn (Token $token) =>
$token->line >= $startLine && $token->line <= $endLine
);
}
/**
* Get tokens at specific line
*/
public function getAtLine(int $line): self
{
return $this->filter(fn (Token $token) => $token->line === $line);
}
/**
* Find first token matching predicate
*/
public function findFirst(callable $predicate): ?Token
{
foreach ($this->tokens as $token) {
if ($predicate($token)) {
return $token;
}
}
return null;
}
/**
* Find first token of type
*/
public function findFirstOfType(TokenType $type): ?Token
{
return $this->findFirst(fn (Token $token) => $token->type === $type);
}
/**
* Map tokens to another form
*/
public function map(callable $mapper): array
{
return array_map($mapper, $this->tokens);
}
/**
* Extract all values
*/
public function getValues(): array
{
return $this->map(fn (Token $token) => $token->value);
}
/**
* Extract all clean values
*/
public function getCleanValues(): array
{
return $this->map(fn (Token $token) => $token->getCleanValue());
}
/**
* Get tokens grouped by type
* @return array<string, array<Token>>
*/
public function groupByType(): array
{
$groups = [];
foreach ($this->tokens as $token) {
$type = $token->type->value;
if (! isset($groups[$type])) {
$groups[$type] = [];
}
$groups[$type][] = $token;
}
return $groups;
}
/**
* Get tokens grouped by line
* @return array<int, array<Token>>
*/
public function groupByLine(): array
{
$groups = [];
foreach ($this->tokens as $token) {
// Split whitespace tokens that span multiple lines
if ($token->type === TokenType::WHITESPACE && str_contains($token->value, "\n")) {
$splitTokens = $this->splitWhitespaceAtNewlines($token);
foreach ($splitTokens as $splitToken) {
if (! isset($groups[$splitToken->line])) {
$groups[$splitToken->line] = [];
}
$groups[$splitToken->line][] = $splitToken;
}
} else {
if (! isset($groups[$token->line])) {
$groups[$token->line] = [];
}
$groups[$token->line][] = $token;
}
}
return $groups;
}
/**
* Split a whitespace token at newline boundaries
*
* When a whitespace token spans multiple lines like "\n ", the leading spaces
* after the newline belong to the NEXT line, not the current one.
*
* Example: Token with value "\n " on line 13:
* - "\n" belongs to line 13 (ends the line)
* - " " belongs to line 14 (starts the next line)
*
* @return Token[]
*/
private function splitWhitespaceAtNewlines(Token $token): array
{
$parts = preg_split('/(\r\n|\n|\r)/', $token->value, -1, PREG_SPLIT_DELIM_CAPTURE);
$result = [];
// Calculate starting line: token.line is where it ENDS
$startLine = $token->line - substr_count($token->value, "\n");
$currentLine = $startLine;
for ($i = 0; $i < count($parts); $i++) {
$part = $parts[$i];
// Skip empty parts
if ($part === '') {
continue;
}
// Check if this is a newline delimiter
$isNewline = in_array($part, ["\r\n", "\n", "\r"], true);
if ($isNewline) {
// Newline belongs to current line (ends it)
$result[] = new Token(
type: TokenType::WHITESPACE,
value: $part,
line: $currentLine,
position: $token->position,
id: $token->id,
context: $token->context
);
$currentLine++;
} else {
// Non-newline whitespace: assign to current line
// (which is the line AFTER the newline if we just processed one)
$result[] = new Token(
type: TokenType::WHITESPACE,
value: $part,
line: $currentLine,
position: $token->position,
id: $token->id,
context: $token->context
);
}
}
return $result;
}
/**
* Check if collection is empty
*/
public function isEmpty(): bool
{
return empty($this->tokens);
}
/**
* Get first token
*/
public function first(): ?Token
{
return $this->tokens[0] ?? null;
}
/**
* Get last token
*/
public function last(): ?Token
{
return empty($this->tokens) ? null : $this->tokens[count($this->tokens) - 1];
}
/**
* Slice collection
*/
public function slice(int $offset, ?int $length = null): self
{
return new self(array_slice($this->tokens, $offset, $length));
}
/**
* Merge with another collection
*/
public function merge(self $other): self
{
return new self(array_merge($this->tokens, $other->tokens));
}
/**
* Convert to string (concatenate all values)
*/
public function toString(): string
{
return implode('', $this->getValues());
}
}