0 follower

Final Class Yiisoft\Db\Sqlite\SqlTokenizer

InheritanceYiisoft\Db\Sqlite\SqlTokenizer » Yiisoft\Db\Sqlite\AbstractTokenizer

Splits SQLite queries into individual SQL tokens.

It's used to obtain CHECK constraint information from a CREATE TABLE SQL code.

Protected Properties

Hide inherited properties

Property Type Description Defined By
$length integer SQL code string length. Yiisoft\Db\Sqlite\AbstractTokenizer
$offset integer SQL code string current offset. Yiisoft\Db\Sqlite\AbstractTokenizer

Public Methods

Hide inherited methods

Method Description Defined By
__construct() Yiisoft\Db\Sqlite\AbstractTokenizer
tokenize() Tokenizes and returns a code type token. Yiisoft\Db\Sqlite\AbstractTokenizer

Protected Methods

Hide inherited methods

Method Description Defined By
indexAfter() Returns an index after the given string in the SQL code starting with the specified offset. Yiisoft\Db\Sqlite\AbstractTokenizer
isComment() Returns whether there's a commentary at the current offset. Yiisoft\Db\Sqlite\SqlTokenizer
isIdentifier() Returns whether there's an identifier at the current offset. Yiisoft\Db\Sqlite\SqlTokenizer
isKeyword() Returns whether the given string is a keyword. Yiisoft\Db\Sqlite\SqlTokenizer
isOperator() Returns whether there's an operator at the current offset. Yiisoft\Db\Sqlite\SqlTokenizer
isStringLiteral() Returns whether there's a string literal at the current offset. Yiisoft\Db\Sqlite\SqlTokenizer
isWhitespace() Returns whether there's a space at the current offset. Yiisoft\Db\Sqlite\SqlTokenizer
startsWithAnyLongest() Returns whether the longest common prefix equals to the SQL code of the same length at the current offset. Yiisoft\Db\Sqlite\AbstractTokenizer
substring() Returns a string of the given length starting with the specified offset. Yiisoft\Db\Sqlite\AbstractTokenizer

Method Details

Hide inherited methods

__construct() public method
public mixed __construct ( string $sql )
$sql string

                public function __construct(private string $sql) {}

            
indexAfter() protected method

Defined in: Yiisoft\Db\Sqlite\AbstractTokenizer::indexAfter()

Returns an index after the given string in the SQL code starting with the specified offset.

protected integer indexAfter ( string $string, integer|null $offset null )
$string string

String to find.

$offset integer|null

SQL code offset, defaults to current if null is passed.

return integer

Index after the given string or end of string index.

                protected function indexAfter(string $string, ?int $offset = null): int
{
    if ($offset === null) {
        $offset = $this->offset;
    }
    if ($offset + mb_strlen($string, 'UTF-8') > $this->length) {
        return $this->length;
    }
    $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8');
    if ($afterIndexOf === false) {
        $afterIndexOf = $this->length;
    } else {
        $afterIndexOf += mb_strlen($string, 'UTF-8');
    }
    return $afterIndexOf;
}

            
isComment() protected method

Returns whether there's a commentary at the current offset.

If these methods return true, it has to set the $length parameter to the length of the matched string.

protected boolean isComment ( integer &$length )
$length integer

The length of the matched string.

return boolean

Whether there's a commentary at the current offset.

                protected function isComment(int &$length): bool
{
    $comments = ['--' => true, '/*' => true];
    $length = 2;
    if (!isset($comments[$this->substring($length)])) {
        return false;
    }
    $char = $this->substring($length) === '--' ? "\n" : '*/';
    $length = $this->indexAfter($char) - $this->offset;
    return true;
}

            
isIdentifier() protected method

Returns whether there's an identifier at the current offset.

If this method returns true, it has to set the $length parameter to the length of the matched string.

It may also set $content to a string that will be used as a token content.

protected boolean isIdentifier ( integer &$length, string|null &$content )
$length integer

The length of the matched string.

$content string|null

Optional content instead of the matched string.

return boolean

Whether there's an identifier at the current offset.

                protected function isIdentifier(int &$length, ?string &$content): bool
{
    $identifierDelimiters = ['"' => '"', '[' => ']', '`' => '`'];
    if (!isset($identifierDelimiters[$this->substring(1)])) {
        return false;
    }
    $delimiter = $identifierDelimiters[$this->substring(1)];
    $offset = $this->offset;
    while (true) {
        $offset = $this->indexAfter($delimiter, $offset + 1);
        if ($delimiter === ']' || $this->substring(1, true, $offset) !== $delimiter) {
            break;
        }
    }
    $length = $offset - $this->offset;
    $content = $this->substring($length - 2, true, $this->offset + 1);
    if ($delimiter !== ']') {
        $content = strtr($content, ["$delimiter$delimiter" => $delimiter]);
    }
    return true;
}

            
isKeyword() protected method

Returns whether the given string is a keyword.

The method may set $content to a string that will be used as a token content.

protected boolean isKeyword ( string $string, string|null &$content )
$string string

The string to match.

$content string|null

Optional content instead of the matched string.

return boolean

Whether the given string is a keyword.

                protected function isKeyword(string $string, ?string &$content): bool
{
    $keywords = [
        'ABORT' => true,
        'ACTION' => true,
        'ADD' => true,
        'AFTER' => true,
        'ALL' => true,
        'ALTER' => true,
        'ANALYZE' => true,
        'AND' => true,
        'AS' => true,
        'ASC' => true,
        'ATTACH' => true,
        'AUTOINCREMENT' => true,
        'BEFORE' => true,
        'BEGIN' => true,
        'BETWEEN' => true,
        'BY' => true,
        'CASCADE' => true,
        'CASE' => true,
        'CAST' => true,
        'CHECK' => true,
        'COLLATE' => true,
        'COLUMN' => true,
        'COMMIT' => true,
        'CONFLICT' => true,
        'CONSTRAINT' => true,
        'CREATE' => true,
        'CROSS' => true,
        'CURRENT_DATE' => true,
        'CURRENT_TIME' => true,
        'CURRENT_TIMESTAMP' => true,
        'DATABASE' => true,
        'DEFAULT' => true,
        'DEFERRABLE' => true,
        'DEFERRED' => true,
        'DELETE' => true,
        'DESC' => true,
        'DETACH' => true,
        'DISTINCT' => true,
        'DROP' => true,
        'EACH' => true,
        'ELSE' => true,
        'END' => true,
        'ESCAPE' => true,
        'EXCEPT' => true,
        'EXCLUSIVE' => true,
        'EXISTS' => true,
        'EXPLAIN' => true,
        'FAIL' => true,
        'FOR' => true,
        'FOREIGN' => true,
        'FROM' => true,
        'FULL' => true,
        'GLOB' => true,
        'GROUP' => true,
        'HAVING' => true,
        'IF' => true,
        'IGNORE' => true,
        'IMMEDIATE' => true,
        'IN' => true,
        'INDEX' => true,
        'INDEXED' => true,
        'INITIALLY' => true,
        'INNER' => true,
        'INSERT' => true,
        'INSTEAD' => true,
        'INTERSECT' => true,
        'INTO' => true,
        'IS' => true,
        'ISNULL' => true,
        'JOIN' => true,
        'KEY' => true,
        'LEFT' => true,
        'LIKE' => true,
        'LIMIT' => true,
        'MATCH' => true,
        'NATURAL' => true,
        'NO' => true,
        'NOT' => true,
        'NOTNULL' => true,
        'NULL' => true,
        'OF' => true,
        'OFFSET' => true,
        'ON' => true,
        'OR' => true,
        'ORDER' => true,
        'OUTER' => true,
        'PLAN' => true,
        'PRAGMA' => true,
        'PRIMARY' => true,
        'QUERY' => true,
        'RAISE' => true,
        'RECURSIVE' => true,
        'REFERENCES' => true,
        'REGEXP' => true,
        'REINDEX' => true,
        'RELEASE' => true,
        'RENAME' => true,
        'REPLACE' => true,
        'RESTRICT' => true,
        'RIGHT' => true,
        'ROLLBACK' => true,
        'ROW' => true,
        'SAVEPOINT' => true,
        'SELECT' => true,
        'SET' => true,
        'TABLE' => true,
        'TEMP' => true,
        'TEMPORARY' => true,
        'THEN' => true,
        'TO' => true,
        'TRANSACTION' => true,
        'TRIGGER' => true,
        'UNION' => true,
        'UNIQUE' => true,
        'UPDATE' => true,
        'USING' => true,
        'VACUUM' => true,
        'VALUES' => true,
        'VIEW' => true,
        'VIRTUAL' => true,
        'WHEN' => true,
        'WHERE' => true,
        'WITH' => true,
        'WITHOUT' => true,
    ];
    $string = mb_strtoupper($string, 'UTF-8');
    if (!isset($keywords[$string])) {
        return false;
    }
    $content = $string;
    return true;
}

            
isOperator() protected method

Returns whether there's an operator at the current offset.

If these methods return true, it has to set the $length parameter to the length of the matched string.

It may also set $content to a string that will be used as a token content.

protected boolean isOperator ( integer &$length, string|null &$content )
$length integer

The length of the matched string.

$content string|null

Optional content instead of the matched string.

return boolean

Whether there's an operator at the current offset.

                protected function isOperator(int &$length, ?string &$content): bool
{
    $operators = [
        '!=',
        '%',
        '&',
        '(',
        ')',
        '*',
        '+',
        ',',
        '-',
        '.',
        '/',
        ';',
        '<',
        '<<',
        '<=',
        '<>',
        '=',
        '==',
        '>',
        '>=',
        '>>',
        '|',
        '||',
        '~',
    ];
    return $this->startsWithAnyLongest($operators, true, $length);
}

            
isStringLiteral() protected method

Returns whether there's a string literal at the current offset.

If this method returns true, it has to set the $length parameter to the length of the matched string.

It may also set $content to a string that will be used as a token content.

protected boolean isStringLiteral ( integer &$length, string|null &$content )
$length integer

The length of the matched string.

$content string|null

Optional content instead of the matched string.

return boolean

Whether there's a string literal at the current offset.

                protected function isStringLiteral(int &$length, ?string &$content): bool
{
    if ($this->substring(1) !== "'") {
        return false;
    }
    $offset = $this->offset;
    while (true) {
        $offset = $this->indexAfter("'", $offset + 1);
        if ($this->substring(1, true, $offset) !== "'") {
            break;
        }
    }
    $length = $offset - $this->offset;
    $content = strtr($this->substring($length - 2, true, $this->offset + 1), ["''" => "'"]);
    return true;
}

            
isWhitespace() protected method

Returns whether there's a space at the current offset.

If this method returns true, it has to set the $length parameter to the length of the matched string.

protected boolean isWhitespace ( integer &$length )
$length integer

The length of the matched string.

return boolean

Whether there's a space at the current offset.

                protected function isWhitespace(int &$length): bool
{
    $whitespaces = ["\f" => true, "\n" => true, "\r" => true, "\t" => true, ' ' => true];
    $length = 1;
    return isset($whitespaces[$this->substring($length)]);
}

            
startsWithAnyLongest() protected method

Defined in: Yiisoft\Db\Sqlite\AbstractTokenizer::startsWithAnyLongest()

Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.

protected boolean startsWithAnyLongest ( array $with, boolean $caseSensitive, integer &$length, string|null &$content null )
$with array

Strings to test. The method will change this parameter to speed up lookups.

$caseSensitive boolean

Whether to perform a case-sensitive comparison.

$length integer

Length of the matched string.

$content string|null

Matched string.

return boolean

Whether there is a match.

                protected function startsWithAnyLongest(
    array $with,
    bool $caseSensitive,
    int &$length,
    ?string &$content = null,
): bool {
    if (empty($with)) {
        return false;
    }
    if (!is_array(reset($with))) {
        usort($with, static fn(string $string1, string $string2) => mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8'));
        $map = [];
        foreach ($with as $string) {
            $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true;
        }
        $with = $map;
    }
    /** @psalm-var array<int, array> $with */
    foreach ($with as $testLength => $testValues) {
        $content = $this->substring($testLength, $caseSensitive);
        if (isset($testValues[$content])) {
            $length = $testLength;
            return true;
        }
    }
    return false;
}

            
substring() protected method

Defined in: Yiisoft\Db\Sqlite\AbstractTokenizer::substring()

Returns a string of the given length starting with the specified offset.

protected string substring ( integer $length, boolean $caseSensitive true, integer|null $offset null )
$length integer

String length to return.

$caseSensitive boolean

If it's false, the string will be uppercase.

$offset integer|null

SQL code offset, defaults to current if null is passed.

return string

Result string, it may be empty if there's nothing to return.

                protected function substring(int $length, bool $caseSensitive = true, ?int $offset = null): string
{
    if ($offset === null) {
        $offset = $this->offset;
    }
    if ($offset + $length > $this->length) {
        return '';
    }
    $cacheKey = $offset . ',' . $length;
    if (!isset($this->substrings[$cacheKey . ',1'])) {
        $this->substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8');
    }
    if (!$caseSensitive && !isset($this->substrings[$cacheKey . ',0'])) {
        $this->substrings[$cacheKey . ',0'] = mb_strtoupper($this->substrings[$cacheKey . ',1'], 'UTF-8');
    }
    return $this->substrings[$cacheKey . ',' . (int) $caseSensitive];
}

            
tokenize() public method

Defined in: Yiisoft\Db\Sqlite\AbstractTokenizer::tokenize()

Tokenizes and returns a code type token.

public Yiisoft\Db\Sqlite\SqlToken tokenize ( )
return Yiisoft\Db\Sqlite\SqlToken

Code type token.

throws InvalidArgumentException

If the SQL code is invalid.

                public function tokenize(): SqlToken
{
    $this->length = mb_strlen($this->sql, 'UTF-8');
    $this->offset = 0;
    $this->substrings = [];
    $this->buffer = '';
    $token = (new SqlToken())->type(SqlToken::TYPE_CODE)->content($this->sql);
    $this->tokenStack = new SplStack();
    $this->tokenStack->push($token);
    $token[] = (new SqlToken())->type(SqlToken::TYPE_STATEMENT);
    $this->tokenStack->push($token[0]);
    $this->currentToken = $this->tokenStack->top();
    $length = 0;
    while (!$this->isEof()) {
        if ($this->isWhitespace($length) || $this->isComment($length)) {
            $this->addTokenFromBuffer();
            $this->advance($length);
            continue;
        }
        /** @psalm-suppress ConflictingReferenceConstraint */
        if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
            $this->advance($length);
            continue;
        }
        $this->buffer .= $this->substring(1);
        $this->advance(1);
    }
    $this->addTokenFromBuffer();
    if (
        $token->getHasChildren()
        && $token[-1] instanceof SqlToken
        && !$token[-1]->getHasChildren()
    ) {
        unset($token[-1]);
    }
    return $token;
}