├── .editorconfig ├── .gitignore ├── .scrutinizer.yml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── composer.json ├── docs └── en │ └── index.md ├── easy-coding-standard.yml ├── phpstan.neon ├── phpunit.xml ├── src ├── Contract │ ├── Parser │ │ └── ParserInterface.php │ └── Tokenizer │ │ └── TokenizerInterface.php ├── Generator │ ├── Common │ │ ├── AbstractVisitor.php │ │ └── Aggregate.php │ ├── Native.php │ ├── Native │ │ ├── BinaryOperator.php │ │ ├── Group.php │ │ ├── Phrase.php │ │ ├── Query.php │ │ ├── Range.php │ │ ├── Tag.php │ │ ├── UnaryOperator.php │ │ ├── User.php │ │ └── Word.php │ └── SQL │ │ ├── BinaryOperator.php │ │ ├── Group.php │ │ ├── Phrase.php │ │ ├── Query.php │ │ ├── Range.php │ │ ├── Resolver │ │ └── AbstractFilterResolver.php │ │ ├── UnaryOperator.php │ │ └── Word.php ├── Token │ ├── Node │ │ ├── Group.php │ │ ├── LogicalAnd.php │ │ ├── LogicalNot.php │ │ ├── LogicalOr.php │ │ ├── Mandatory.php │ │ ├── Prohibited.php │ │ ├── Query.php │ │ └── Term.php │ └── Token │ │ ├── Flags.php │ │ ├── GroupBegin.php │ │ ├── Phrase.php │ │ ├── Range.php │ │ ├── Tag.php │ │ ├── User.php │ │ └── Word.php ├── Tokenizer │ ├── AbstractTokenExtractor.php │ ├── Full.php │ ├── Parser.php │ ├── Text.php │ └── Tokenizer.php └── Value │ ├── AbstractNode.php │ ├── Correction.php │ ├── SyntaxTree.php │ ├── Token.php │ └── TokenSequence.php └── tests ├── Generator ├── Common │ └── AggregateTest.php ├── Native │ └── RangeTest.php └── SQL │ └── FilterParserTest.php ├── Integration ├── FilterParser.php └── Generator │ └── SQL │ └── Resolver │ └── ItemFilterResolver.php ├── IntegrationTest.php ├── Token └── Token │ └── RangeTest.php ├── Tokenizer ├── FullTest.php ├── TextTest.php └── TokenizerTest.php ├── Value └── NodeTraversalTest.php └── bootstrap.php /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | charset = utf-8 7 | trim_trailing_whitespace = true 8 | 9 | [*.{php,php}] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.{yml,yaml}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | [composer.json] 18 | indent_style = tab 19 | indent_size = 4 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | !.gitignore 3 | composer.lock 4 | 5 | -------------------------------------------------------------------------------- /.scrutinizer.yml: -------------------------------------------------------------------------------- 1 | build: 2 | nodes: 3 | analysis: 4 | tests: 5 | override: 6 | - php-scrutinizer-run 7 | 8 | tools: 9 | external_code_coverage: true 10 | 11 | checks: 12 | php: 13 | code_rating: true 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | 3 | matrix: 4 | include: 5 | - php: 7.1 6 | env: 7 | - PHPUNIT_FLAGS="--coverage-clover coverage.xml" 8 | - COMPOSER_FLAGS="" 9 | - php: 7.1 10 | env: 11 | - PHPSTAN=true 12 | - COMPOSER_FLAGS="" 13 | - php: 7.2 14 | env: 15 | - PHPUNIT_FLAGS="--coverage-clover coverage.xml" 16 | - COMPOSER_FLAGS="" 17 | - php: 7.2 18 | env: 19 | - PHPSTAN=true 20 | - COMPOSER_FLAGS="" 21 | - php: 7.3 22 | env: 23 | - PHPUNIT_FLAGS="--coverage-clover coverage.xml" 24 | - COMPOSER_FLAGS="--ignore-platform-reqs" 25 | - php: 7.3 26 | env: 27 | - CODING_STANDARD=true 28 | - COMPOSER_FLAGS="--ignore-platform-reqs" 29 | - php: 7.3 30 | env: 31 | - PHPSTAN=true 32 | - COMPOSER_FLAGS="--ignore-platform-reqs" 33 | 34 | install: 35 | - composer install $COMPOSER_FLAGS 36 | 37 | script: 38 | - vendor/bin/phpunit $PHPUNIT_FLAGS 39 | - if [[ $CODING_STANDARD != "" ]]; then vendor/bin/ecs check src tests; fi 40 | - if [[ $PHPSTAN != "" ]]; then vendor/bin/phpstan analyse src tests --level max --configuration phpstan.neon; fi 41 | 42 | after_script: 43 | # upload coverage.xml file to Scrutinizer to analyze it 44 | - | 45 | if [[ "$PHPUNIT_FLAGS" != "" ]]; then 46 | wget https://scrutinizer-ci.com/ocular.phar 47 | php ocular.phar code-coverage:upload --format=php-clover coverage.xml 48 | fi 49 | 50 | notifications: 51 | email: never 52 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at portiny@tomaspilar.cz. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017-2019 Tomáš Pilař 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
(?.*?)(?:(?(?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?)/Aus' 39 | => Tokenizer::TOKEN_TERM, 40 | '/(?(?:(? (?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?(? [\[\{])' . 41 | '(? ([a-zA-Z0-9\,\._-]+|\*)|(? (?([a-zA-Z0-9\,\._-]+|\*)|(? (?[\]\}])(?:(? (?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?)/Aus' 44 | => Tokenizer::TOKEN_TERM, 45 | '/(? (?:(? (?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?' . 46 | '(? (?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))' . 47 | '(?:(? (?[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?(?:(? Tokenizer::TOKEN_TERM, 49 | ]; 50 | 51 | 52 | protected function getExpressionTypeMap(): array 53 | { 54 | return self::$expressionTypeMap; 55 | } 56 | 57 | 58 | protected function createTermToken(int $position, array $data): Token 59 | { 60 | $lexeme = $data['lexeme']; 61 | switch (true) { 62 | case isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol']): 63 | $startValue = str_replace(',', '.', str_replace('"', '', $data['rangeFrom'])); 64 | $endValue = str_replace(',', '.', str_replace('"', '', $data['rangeTo'])); 65 | 66 | return new Range( 67 | $lexeme, 68 | $position, 69 | $data['domain'], 70 | is_array($startValue) ? reset($startValue) : $startValue, 71 | is_array($endValue) ? reset($endValue) : $endValue, 72 | $this->getRangeTypeBySymbol($data['rangeStartSymbol']), 73 | $this->getRangeTypeBySymbol($data['rangeEndSymbol']), 74 | isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null 75 | ); 76 | 77 | case isset($data['word']): 78 | return new Word( 79 | $lexeme, 80 | $position, 81 | $data['domain'], 82 | // un-backslash special characters 83 | preg_replace('/(?:\\\\(\\\\|(["+\-!():#@ ])))/', '$1', $data['word']), 84 | isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null 85 | ); 86 | 87 | case isset($data['phrase']): 88 | $quote = $data['quote']; 89 | return new Phrase( 90 | $lexeme, 91 | $position, 92 | $data['domain'], 93 | $quote, 94 | // un-backslash quote 95 | preg_replace('/(?:\\\\([' . $quote . ']))/', '$1', $data['phrase']), 96 | isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null 97 | ); 98 | 99 | case isset($data['tag']): 100 | return new Tag($lexeme, $position, $data['marker'], $data['tag']); 101 | 102 | case isset($data['user']): 103 | return new User($lexeme, $position, $data['marker'], $data['user']); 104 | } 105 | throw new RuntimeException('Could not extract term token from the given data'); 106 | } 107 | 108 | 109 | protected function getRangeTypeBySymbol(string $symbol): string 110 | { 111 | if (in_array($symbol, ['{', '}'], true)) { 112 | return Range::TYPE_EXCLUSIVE; 113 | } 114 | return Range::TYPE_INCLUSIVE; 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/Tokenizer/Parser.php: -------------------------------------------------------------------------------- 1 | ['reduceGroup', 'reducePreference', 'reduceLogicalNot', 'reduceLogicalAnd', 'reduceLogicalOr'], 81 | 'unaryOperator' => ['reduceLogicalNot', 'reduceLogicalAnd', 'reduceLogicalOr'], 82 | 'logicalOr' => [], 83 | 'logicalAnd' => ['reduceLogicalOr'], 84 | 'term' => ['reducePreference', 'reduceLogicalNot', 'reduceLogicalAnd', 'reduceLogicalOr'], 85 | ]; 86 | 87 | /** 88 | * @var int[] 89 | */ 90 | private static $tokenShortcuts = [ 91 | 'operatorNot' => Tokenizer::TOKEN_LOGICAL_NOT | Tokenizer::TOKEN_LOGICAL_NOT_2, 92 | 'operatorPreference' => Tokenizer::TOKEN_MANDATORY | Tokenizer::TOKEN_PROHIBITED, 93 | 'operatorPrefix' => Tokenizer::TOKEN_MANDATORY | Tokenizer::TOKEN_PROHIBITED | Tokenizer::TOKEN_LOGICAL_NOT_2, 94 | 'operatorUnary' => Tokenizer::TOKEN_MANDATORY | Tokenizer::TOKEN_PROHIBITED | Tokenizer::TOKEN_LOGICAL_NOT 95 | | Tokenizer::TOKEN_LOGICAL_NOT_2, 96 | 'operatorBinary' => Tokenizer::TOKEN_LOGICAL_AND | Tokenizer::TOKEN_LOGICAL_OR, 97 | 'operator' => Tokenizer::TOKEN_LOGICAL_AND | Tokenizer::TOKEN_LOGICAL_OR | Tokenizer::TOKEN_MANDATORY 98 | | Tokenizer::TOKEN_PROHIBITED | Tokenizer::TOKEN_LOGICAL_NOT | Tokenizer::TOKEN_LOGICAL_NOT_2, 99 | 'groupDelimiter' => Tokenizer::TOKEN_GROUP_BEGIN | Tokenizer::TOKEN_GROUP_END, 100 | 'binaryOperatorAndWhitespace' => Tokenizer::TOKEN_LOGICAL_AND | Tokenizer::TOKEN_LOGICAL_OR 101 | | Tokenizer::TOKEN_WHITESPACE, 102 | ]; 103 | 104 | /** 105 | * @var string[] 106 | */ 107 | private static $shifts = [ 108 | Tokenizer::TOKEN_WHITESPACE => 'shiftWhitespace', 109 | Tokenizer::TOKEN_TERM => 'shiftTerm', 110 | Tokenizer::TOKEN_GROUP_BEGIN => 'shiftGroupBegin', 111 | Tokenizer::TOKEN_GROUP_END => 'shiftGroupEnd', 112 | Tokenizer::TOKEN_LOGICAL_AND => 'shiftBinaryOperator', 113 | Tokenizer::TOKEN_LOGICAL_OR => 'shiftBinaryOperator', 114 | Tokenizer::TOKEN_LOGICAL_NOT => 'shiftLogicalNot', 115 | Tokenizer::TOKEN_LOGICAL_NOT_2 => 'shiftLogicalNot2', 116 | Tokenizer::TOKEN_MANDATORY => 'shiftPreference', 117 | Tokenizer::TOKEN_PROHIBITED => 'shiftPreference', 118 | Tokenizer::TOKEN_BAILOUT => 'shiftBailout', 119 | ]; 120 | 121 | /** 122 | * @var string[] 123 | */ 124 | private static $nodeToReductionGroup = [ 125 | Group::class => 'group', 126 | LogicalAnd::class => 'logicalAnd', 127 | LogicalOr::class => 'logicalOr', 128 | LogicalNot::class => 'unaryOperator', 129 | Mandatory::class => 'unaryOperator', 130 | Prohibited::class => 'unaryOperator', 131 | Term::class => 'term', 132 | ]; 133 | 134 | /** 135 | * Input tokens. 136 | * 137 | * @var Token[] 138 | */ 139 | private $tokens = []; 140 | 141 | /** 142 | * An array of applied corrections. 143 | * 144 | * @var Correction[] 145 | */ 146 | private $corrections = []; 147 | 148 | /** 149 | * Query stack. 150 | * 151 | * @var SplStack 152 | */ 153 | private $stack; 154 | 155 | 156 | public function parse(TokenSequence $tokenSequence): SyntaxTree 157 | { 158 | $this->init($tokenSequence->getTokens()); 159 | 160 | while ($this->tokens !== []) { 161 | $node = $this->shift(); 162 | 163 | if ($node instanceof AbstractNode) { 164 | $this->reduce($node); 165 | } 166 | } 167 | 168 | $this->reduceQuery(); 169 | 170 | return new SyntaxTree($this->stack->top(), $tokenSequence, $this->corrections); 171 | } 172 | 173 | 174 | public function ignoreLogicalNotOperatorsPrecedingPreferenceOperator(): void 175 | { 176 | /** @var Token[] $precedingOperators */ 177 | $precedingOperators = $this->ignorePrecedingOperators(self::$tokenShortcuts['operatorNot']); 178 | 179 | if ($precedingOperators !== []) { 180 | $this->addCorrection( 181 | self::CORRECTION_LOGICAL_NOT_OPERATORS_PRECEDING_PREFERENCE_IGNORED, 182 | ...$precedingOperators 183 | ); 184 | } 185 | } 186 | 187 | 188 | private function shiftWhitespace(): void 189 | { 190 | if ($this->isTopStackToken(self::$tokenShortcuts['operatorPrefix'])) { 191 | $this->addCorrection(self::CORRECTION_UNARY_OPERATOR_MISSING_OPERAND_IGNORED, $this->stack->pop()); 192 | } 193 | } 194 | 195 | 196 | private function shiftPreference(Token $token): void 197 | { 198 | $this->shiftAdjacentUnaryOperator($token, self::$tokenShortcuts['operator']); 199 | } 200 | 201 | 202 | private function shiftAdjacentUnaryOperator(Token $token, ?int $tokenMask): void 203 | { 204 | if ($this->isToken(reset($this->tokens), $tokenMask)) { 205 | $this->addCorrection(self::CORRECTION_ADJACENT_UNARY_OPERATOR_PRECEDING_OPERATOR_IGNORED, $token); 206 | 207 | return; 208 | } 209 | 210 | $this->stack->push($token); 211 | } 212 | 213 | 214 | private function shiftLogicalNot(Token $token): void 215 | { 216 | $this->stack->push($token); 217 | } 218 | 219 | 220 | private function shiftLogicalNot2(Token $token): void 221 | { 222 | $tokenMask = self::$tokenShortcuts['operator'] & ~Tokenizer::TOKEN_LOGICAL_NOT_2; 223 | 224 | $this->shiftAdjacentUnaryOperator($token, $tokenMask); 225 | } 226 | 227 | 228 | private function shiftBinaryOperator(Token $token): void 229 | { 230 | if ($this->stack->isEmpty() || $this->isTopStackToken(Tokenizer::TOKEN_GROUP_BEGIN)) { 231 | $this->addCorrection(self::CORRECTION_BINARY_OPERATOR_MISSING_LEFT_OPERAND_IGNORED, $token); 232 | 233 | return; 234 | } 235 | 236 | if ($this->isTopStackToken(self::$tokenShortcuts['operator'])) { 237 | $this->ignoreBinaryOperatorFollowingOperator($token); 238 | 239 | return; 240 | } 241 | 242 | $this->stack->push($token); 243 | } 244 | 245 | 246 | private function shiftTerm(Token $token): Term 247 | { 248 | return new Term($token); 249 | } 250 | 251 | 252 | private function shiftGroupBegin(Token $token): void 253 | { 254 | $this->stack->push($token); 255 | } 256 | 257 | 258 | private function shiftGroupEnd(Token $token): Group 259 | { 260 | $this->stack->push($token); 261 | 262 | return new Group; 263 | } 264 | 265 | 266 | private function shiftBailout(Token $token): void 267 | { 268 | $this->addCorrection(self::CORRECTION_BAILOUT_TOKEN_IGNORED, $token); 269 | } 270 | 271 | 272 | private function reducePreference(AbstractNode $node): AbstractNode 273 | { 274 | if (! $this->isTopStackToken(self::$tokenShortcuts['operatorPreference'])) { 275 | return $node; 276 | } 277 | 278 | $token = $this->stack->pop(); 279 | 280 | if ($this->isToken($token, Tokenizer::TOKEN_MANDATORY)) { 281 | return new Mandatory($node, $token); 282 | } 283 | 284 | return new Prohibited($node, $token); 285 | } 286 | 287 | 288 | private function reduceLogicalNot(AbstractNode $node): AbstractNode 289 | { 290 | if (! $this->isTopStackToken(self::$tokenShortcuts['operatorNot'])) { 291 | return $node; 292 | } 293 | 294 | if ($node instanceof Mandatory || $node instanceof Prohibited) { 295 | $this->ignoreLogicalNotOperatorsPrecedingPreferenceOperator(); 296 | 297 | return $node; 298 | } 299 | 300 | return new LogicalNot($node, $this->stack->pop()); 301 | } 302 | 303 | 304 | private function reduceLogicalAnd(AbstractNode $node): AbstractNode 305 | { 306 | if ($this->stack->count() <= 1 || ! $this->isTopStackToken(Tokenizer::TOKEN_LOGICAL_AND)) { 307 | return $node; 308 | } 309 | 310 | $token = $this->stack->pop(); 311 | $leftOperand = $this->stack->pop(); 312 | 313 | return new LogicalAnd($leftOperand, $node, $token); 314 | } 315 | 316 | 317 | /** 318 | * Reduce logical OR. 319 | * 320 | * @param bool $inGroup Reduce inside a group 321 | * @return LogicalOr|AbstractNode|null 322 | */ 323 | private function reduceLogicalOr(AbstractNode $node, bool $inGroup = false) 324 | { 325 | if ($this->stack->count() <= 1 || ! $this->isTopStackToken(Tokenizer::TOKEN_LOGICAL_OR)) { 326 | return $node; 327 | } 328 | 329 | // If inside a group don't look for following logical AND 330 | if (! $inGroup) { 331 | $this->popWhitespace(); 332 | // If the next token is logical AND, put the node on stack 333 | // as that has precedence over logical OR 334 | if ($this->isToken(reset($this->tokens), Tokenizer::TOKEN_LOGICAL_AND)) { 335 | $this->stack->push($node); 336 | 337 | return null; 338 | } 339 | } 340 | 341 | $token = $this->stack->pop(); 342 | $leftOperand = $this->stack->pop(); 343 | 344 | return new LogicalOr($leftOperand, $node, $token); 345 | } 346 | 347 | 348 | private function reduceGroup(Group $group): ?Group 349 | { 350 | $rightDelimiter = $this->stack->pop(); 351 | 352 | // Pop dangling tokens 353 | $this->popTokens(~Tokenizer::TOKEN_GROUP_BEGIN); 354 | 355 | if ($this->isTopStackToken(Tokenizer::TOKEN_GROUP_BEGIN)) { 356 | $leftDelimiter = $this->stack->pop(); 357 | $this->ignoreEmptyGroup($leftDelimiter, $rightDelimiter); 358 | $this->reduceRemainingLogicalOr(true); 359 | 360 | return null; 361 | } 362 | 363 | $this->reduceRemainingLogicalOr(true); 364 | 365 | $group->setNodes($this->collectTopStackNodes()); 366 | $group->setTokenLeft($this->stack->pop()); 367 | $group->setTokenRight($rightDelimiter); 368 | 369 | return $group; 370 | } 371 | 372 | 373 | /** 374 | * @return mixed 375 | */ 376 | private function shift() 377 | { 378 | $token = array_shift($this->tokens); 379 | if ($token === null) { 380 | return null; 381 | } 382 | 383 | $shift = self::$shifts[$token->getType()]; 384 | 385 | return $this->{$shift}($token); 386 | } 387 | 388 | 389 | private function reduce(AbstractNode $node): void 390 | { 391 | $previousNode = null; 392 | $reductionIndex = 0; 393 | 394 | while ($node instanceof AbstractNode) { 395 | // Reset reduction index on first iteration or on Node change 396 | if ($node !== $previousNode) { 397 | $reductionIndex = 0; 398 | } 399 | 400 | // If there are no reductions to try, put the Node on the stack 401 | // and continue shifting 402 | $reduction = $this->getReduction($node, $reductionIndex); 403 | if ($reduction === null) { 404 | $this->stack->push($node); 405 | break; 406 | } 407 | 408 | $previousNode = $node; 409 | $node = $this->{$reduction}($node); 410 | ++$reductionIndex; 411 | } 412 | } 413 | 414 | 415 | private function ignoreBinaryOperatorFollowingOperator(Token $token): void 416 | { 417 | $precedingOperators = $this->ignorePrecedingOperators(self::$tokenShortcuts['operator']); 418 | $followingOperators = $this->ignoreFollowingOperators(); 419 | 420 | $this->addCorrection( 421 | self::CORRECTION_BINARY_OPERATOR_FOLLOWING_OPERATOR_IGNORED, 422 | ...array_merge($precedingOperators, [$token], $followingOperators) 423 | ); 424 | } 425 | 426 | 427 | /** 428 | * Collect all Nodes from the top of the stack. 429 | * 430 | * @return AbstractNode[] 431 | */ 432 | private function collectTopStackNodes() 433 | { 434 | $nodes = []; 435 | 436 | while (! $this->stack->isEmpty() && $this->stack->top() instanceof AbstractNode) { 437 | array_unshift($nodes, $this->stack->pop()); 438 | } 439 | 440 | return $nodes; 441 | } 442 | 443 | 444 | private function ignoreEmptyGroup(Token $leftDelimiter, Token $rightDelimiter): void 445 | { 446 | $precedingOperators = $this->ignorePrecedingOperators(self::$tokenShortcuts['operator']); 447 | $followingOperators = $this->ignoreFollowingOperators(); 448 | 449 | $this->addCorrection( 450 | self::CORRECTION_EMPTY_GROUP_IGNORED, 451 | ...array_merge($precedingOperators, [$leftDelimiter, $rightDelimiter], $followingOperators) 452 | ); 453 | } 454 | 455 | 456 | /** 457 | * Initialize the parser with given array of $tokens. 458 | * 459 | * @param Token[] $tokens 460 | */ 461 | private function init(array $tokens): void 462 | { 463 | $this->corrections = []; 464 | $this->tokens = $tokens; 465 | $this->cleanupGroupDelimiters($this->tokens); 466 | $this->stack = new SplStack(); 467 | } 468 | 469 | 470 | private function getReduction(AbstractNode $node, int $reductionIndex): ?string 471 | { 472 | $reductionGroup = self::$nodeToReductionGroup[get_class($node)]; 473 | 474 | if (isset(self::$reductionGroups[$reductionGroup][$reductionIndex])) { 475 | return self::$reductionGroups[$reductionGroup][$reductionIndex]; 476 | } 477 | 478 | return null; 479 | } 480 | 481 | 482 | private function reduceQuery(): void 483 | { 484 | $this->popTokens(); 485 | $this->reduceRemainingLogicalOr(); 486 | $nodes = []; 487 | 488 | while (! $this->stack->isEmpty()) { 489 | array_unshift($nodes, $this->stack->pop()); 490 | } 491 | 492 | $this->stack->push(new Query($nodes)); 493 | } 494 | 495 | 496 | /** 497 | * Check if the given $token is an instance of Token. 498 | * 499 | * Optionally also checks given Token $typeMask. 500 | * 501 | * @param mixed $token 502 | * @param int $typeMask 503 | * 504 | * @return bool 505 | */ 506 | private function isToken($token, $typeMask = null) 507 | { 508 | if (! $token instanceof Token) { 509 | return false; 510 | } 511 | 512 | if ($typeMask === null || (bool) ($token->getType() & $typeMask)) { 513 | return true; 514 | } 515 | 516 | return false; 517 | } 518 | 519 | 520 | private function isTopStackToken(?int $type = null): bool 521 | { 522 | return ! $this->stack->isEmpty() && $this->isToken($this->stack->top(), $type); 523 | } 524 | 525 | 526 | /** 527 | * Remove whitespace Tokens from the beginning of the token array. 528 | */ 529 | private function popWhitespace(): void 530 | { 531 | while ($this->isToken(reset($this->tokens), Tokenizer::TOKEN_WHITESPACE)) { 532 | array_shift($this->tokens); 533 | } 534 | } 535 | 536 | 537 | /** 538 | * Remove all Tokens from the top of the query stack and log Corrections as necessary. 539 | * 540 | * Optionally also checks that Token matches given $typeMask. 541 | * 542 | * @param int $typeMask 543 | */ 544 | private function popTokens($typeMask = null): void 545 | { 546 | while ($this->isTopStackToken($typeMask)) { 547 | /** @var Token $token */ 548 | $token = $this->stack->pop(); 549 | if ((bool) ($token->getType() & self::$tokenShortcuts['operatorUnary'])) { 550 | $this->addCorrection(self::CORRECTION_UNARY_OPERATOR_MISSING_OPERAND_IGNORED, $token); 551 | } else { 552 | $this->addCorrection(self::CORRECTION_BINARY_OPERATOR_MISSING_RIGHT_OPERAND_IGNORED, $token); 553 | } 554 | } 555 | } 556 | 557 | 558 | private function ignorePrecedingOperators(?int $type): array 559 | { 560 | $tokens = []; 561 | while ($this->isTopStackToken($type)) { 562 | array_unshift($tokens, $this->stack->pop()); 563 | } 564 | 565 | return $tokens; 566 | } 567 | 568 | 569 | private function ignoreFollowingOperators(): array 570 | { 571 | $tokenMask = self::$tokenShortcuts['binaryOperatorAndWhitespace']; 572 | $tokens = []; 573 | while ($this->isToken(reset($this->tokens), $tokenMask)) { 574 | $token = array_shift($this->tokens); 575 | if ($token !== null && (bool) ($token->getType() & self::$tokenShortcuts['operatorBinary'])) { 576 | $tokens[] = $token; 577 | } 578 | } 579 | 580 | return $tokens; 581 | } 582 | 583 | 584 | /** 585 | * Reduce logical OR possibly remaining after reaching end of group or query. 586 | * 587 | * @param bool $inGroup Reduce inside a group 588 | */ 589 | private function reduceRemainingLogicalOr($inGroup = false): void 590 | { 591 | if (! $this->stack->isEmpty() && ! $this->isTopStackToken()) { 592 | $node = $this->reduceLogicalOr($this->stack->pop(), $inGroup); 593 | $this->stack->push($node); 594 | } 595 | } 596 | 597 | 598 | /** 599 | * Clean up group delimiter tokens, removing unmatched left and right delimiter. 600 | * 601 | * Closest group delimiters will be matched first, unmatched remainder is removed. 602 | * 603 | * @param Token[] $tokens 604 | */ 605 | private function cleanupGroupDelimiters(array &$tokens): void 606 | { 607 | $indexes = $this->getUnmatchedGroupDelimiterIndexes($tokens); 608 | 609 | while (count($indexes) > 0) { 610 | $lastIndex = array_pop($indexes); 611 | $token = $tokens[$lastIndex]; 612 | unset($tokens[$lastIndex]); 613 | 614 | if ($token->getType() === Tokenizer::TOKEN_GROUP_BEGIN) { 615 | $this->addCorrection(self::CORRECTION_UNMATCHED_GROUP_LEFT_DELIMITER_IGNORED, $token); 616 | } else { 617 | $this->addCorrection(self::CORRECTION_UNMATCHED_GROUP_RIGHT_DELIMITER_IGNORED, $token); 618 | } 619 | } 620 | } 621 | 622 | 623 | private function getUnmatchedGroupDelimiterIndexes(array &$tokens): array 624 | { 625 | $trackLeft = []; 626 | $trackRight = []; 627 | 628 | foreach ($tokens as $index => $token) { 629 | if (! $this->isToken($token, self::$tokenShortcuts['groupDelimiter'])) { 630 | continue; 631 | } 632 | 633 | if ($this->isToken($token, Tokenizer::TOKEN_GROUP_BEGIN)) { 634 | $trackLeft[] = $index; 635 | continue; 636 | } 637 | 638 | if (count($trackLeft) === 0) { 639 | $trackRight[] = $index; 640 | } else { 641 | array_pop($trackLeft); 642 | } 643 | } 644 | 645 | return array_merge($trackLeft, $trackRight); 646 | } 647 | 648 | 649 | /** 650 | * @param mixed $type 651 | */ 652 | private function addCorrection($type, Token ...$tokens): void 653 | { 654 | $this->corrections[] = new Correction($type, ...$tokens); 655 | } 656 | 657 | } 658 | -------------------------------------------------------------------------------- /src/Tokenizer/Text.php: -------------------------------------------------------------------------------- 1 | [\s]+)/Au' => Tokenizer::TOKEN_WHITESPACE, 20 | '/(? \+)/Au' => Tokenizer::TOKEN_MANDATORY, 21 | '/(? -)/Au' => Tokenizer::TOKEN_PROHIBITED, 22 | '/(? !)/Au' => Tokenizer::TOKEN_LOGICAL_NOT_2, 23 | '/(? \))/Au' => Tokenizer::TOKEN_GROUP_END, 24 | '/(? NOT)(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_NOT, 25 | '/(? (?:AND|&&))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_AND, 26 | '/(? (?:OR|\|\|))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_OR, 27 | '/(? \()/Au' => Tokenizer::TOKEN_GROUP_BEGIN, 28 | '/(? (? (?.*?)(?:(? Tokenizer::TOKEN_TERM, 29 | '/(?(? (?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))(?:(? Tokenizer::TOKEN_TERM, 31 | ]; 32 | 33 | 34 | protected function getExpressionTypeMap(): array 35 | { 36 | return self::$expressionTypeMap; 37 | } 38 | 39 | 40 | protected function createTermToken(int $position, array $data): Token 41 | { 42 | $lexeme = $data['lexeme']; 43 | switch (true) { 44 | case isset($data['word']): 45 | return new Word( 46 | $lexeme, 47 | $position, 48 | '', 49 | // un-backslash special chars 50 | preg_replace('/(?:\\\\(\\\\|(["+\-!() ])))/', '$1', $data['word']) 51 | ); 52 | case isset($data['phrase']): 53 | $quote = $data['quote']; 54 | return new Phrase( 55 | $lexeme, 56 | $position, 57 | '', 58 | $quote, 59 | // un-backslash quote 60 | preg_replace('/(?:\\\\([' . $quote . ']))/', '$1', $data['phrase']) 61 | ); 62 | } 63 | throw new RuntimeException('Could not extract term token from the given data'); 64 | } 65 | 66 | 67 | protected function createGroupBeginToken(int $position, array $data): GroupBegin 68 | { 69 | return new GroupBegin($data['lexeme'], $position, $data['lexeme'], ''); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/Tokenizer/Tokenizer.php: -------------------------------------------------------------------------------- 1 | tokenExtractor = $tokenExtractor; 101 | } 102 | 103 | 104 | public function tokenize(string $string): TokenSequence 105 | { 106 | $length = mb_strlen($string); 107 | $position = 0; 108 | $tokens = []; 109 | while ($position < $length) { 110 | $token = $this->tokenExtractor->extract($string, $position); 111 | $position += mb_strlen($token->getLexeme()); 112 | $tokens[] = $token; 113 | } 114 | return new TokenSequence($tokens, $string); 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/Value/AbstractNode.php: -------------------------------------------------------------------------------- 1 | type = $type; 26 | $this->tokens = $tokens; 27 | } 28 | 29 | 30 | /** 31 | * @return mixed 32 | */ 33 | public function getType() 34 | { 35 | return $this->type; 36 | } 37 | 38 | 39 | public function getTokens(): array 40 | { 41 | return $this->tokens; 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/Value/SyntaxTree.php: -------------------------------------------------------------------------------- 1 | rootNode = $rootNode; 30 | $this->tokenSequence = $tokenSequence; 31 | $this->corrections = $corrections; 32 | } 33 | 34 | 35 | public function getRootNode(): AbstractNode 36 | { 37 | return $this->rootNode; 38 | } 39 | 40 | 41 | public function getTokenSequence(): TokenSequence 42 | { 43 | return $this->tokenSequence; 44 | } 45 | 46 | 47 | /** 48 | * @return Correction[] 49 | */ 50 | public function getCorrections(): array 51 | { 52 | return $this->corrections; 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/Value/Token.php: -------------------------------------------------------------------------------- 1 | type = $type; 27 | $this->lexeme = $lexeme; 28 | $this->position = $position; 29 | } 30 | 31 | 32 | public function getType(): int 33 | { 34 | return $this->type; 35 | } 36 | 37 | 38 | public function getLexeme(): string 39 | { 40 | return $this->lexeme; 41 | } 42 | 43 | 44 | public function getPosition(): int 45 | { 46 | return $this->position; 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/Value/TokenSequence.php: -------------------------------------------------------------------------------- 1 | tokens = $tokens; 25 | $this->source = $source; 26 | } 27 | 28 | 29 | /** 30 | * @return Token[] 31 | */ 32 | public function getTokens(): array 33 | { 34 | return $this->tokens; 35 | } 36 | 37 | 38 | public function getSource(): string 39 | { 40 | return $this->source; 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /tests/Generator/Common/AggregateTest.php: -------------------------------------------------------------------------------- 1 | getMockBuilder(AbstractNode::class)->getMock(); 17 | self::assertTrue((new Aggregate)->accept($nodeMock)); 18 | } 19 | 20 | 21 | public function testVisitThrowsException(): void 22 | { 23 | self::expectException(RuntimeException::class); 24 | self::expectExceptionMessage('No visitor available for Mock'); 25 | 26 | /** @var AbstractNode $nodeMock */ 27 | $nodeMock = $this->getMockBuilder(AbstractNode::class)->getMock(); 28 | (new Aggregate)->visit($nodeMock); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /tests/Generator/Native/RangeTest.php: -------------------------------------------------------------------------------- 1 | visitor = new Range(); 28 | } 29 | 30 | 31 | public function acceptDataprovider(): array 32 | { 33 | return [ 34 | [true, new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], 35 | [false, new Term(new Word('word', 0, '', 'a'))], 36 | ]; 37 | } 38 | 39 | 40 | /** 41 | * @dataProvider acceptDataprovider 42 | */ 43 | public function testAccepts(bool $expected, AbstractNode $node): void 44 | { 45 | self::assertSame($expected, $this->visitor->accept($node)); 46 | } 47 | 48 | 49 | public function visitDataprovider(): array 50 | { 51 | return [ 52 | ['[a TO b]', new Term(new RangeToken('[a TO b]', 0, '', 'a', 'b', 'inclusive', 'inclusive'))], 53 | ['[a TO b}', new Term(new RangeToken('[a TO b}', 0, '', 'a', 'b', 'inclusive', 'exclusive'))], 54 | ['{a TO b}', new Term(new RangeToken('{a TO b}', 0, '', 'a', 'b', 'exclusive', 'exclusive'))], 55 | ['{a TO b]', new Term(new RangeToken('{a TO b]', 0, '', 'a', 'b', 'exclusive', 'inclusive'))], 56 | ]; 57 | } 58 | 59 | 60 | /** 61 | * @dataProvider visitDataprovider 62 | */ 63 | public function testVisit(string $expected, AbstractNode $node): void 64 | { 65 | self::assertSame($expected, $this->visitor->visit($node)); 66 | } 67 | 68 | 69 | public function visitWrongNodeDataprovider(): array 70 | { 71 | $operand = $this->getMockForAbstractClass(AbstractNode::class); 72 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 73 | 74 | return [[new Mandatory($operand, $token)], [new Term(new Word('word', 0, '', 'a'))]]; 75 | } 76 | 77 | 78 | /** 79 | * @dataProvider visitWrongNodeDataprovider 80 | */ 81 | public function testVisitWrongNodeFails(AbstractNode $node): void 82 | { 83 | $this->expectException(LogicException::class); 84 | $this->visitor->visit($node); 85 | } 86 | 87 | 88 | public function testVisitUnknownRangeStartTypeFails(): void 89 | { 90 | $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); 91 | $token->setStartType('unknown'); 92 | $node = new Term($token); 93 | $this->expectException(LogicException::class); 94 | $this->expectExceptionMessage('Range start type unknown is not supported'); 95 | $this->visitor->visit($node); 96 | } 97 | 98 | 99 | public function testVisitUnknownRangeEndTypeFails(): void 100 | { 101 | $token = new RangeToken('{a TO b}', 0, '', 'a', 'b', 'inclusive', 'inclusive'); 102 | $token->setEndType('unknown'); 103 | $node = new Term($token); 104 | $this->expectException(LogicException::class); 105 | $this->expectExceptionMessage('Range end type unknown is not supported'); 106 | $this->visitor->visit($node); 107 | } 108 | 109 | } 110 | -------------------------------------------------------------------------------- /tests/Generator/SQL/FilterParserTest.php: -------------------------------------------------------------------------------- 1 | = '2019-01-01T00:00:00+00:00' AND introduced_at <= '2019-01-31T23:59:59+00:00')" 22 | . " AND NOT ((type = 'tv' OR type = 'mobile phone')) OR ((price > 10) OR (price > 30))", 23 | $sql 24 | ); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /tests/Integration/FilterParser.php: -------------------------------------------------------------------------------- 1 | tokenize($fql); 26 | 27 | $parser = new Parser; 28 | $syntaxTree = $parser->parse($tokenSequence); 29 | 30 | $visitor = new Aggregate( 31 | [ 32 | new BinaryOperator, 33 | new UnaryOperator, 34 | new Group, 35 | new Query, 36 | new Phrase($filterResolver), 37 | new Range($filterResolver), 38 | new Word($filterResolver), 39 | ] 40 | ); 41 | 42 | return $visitor->visit($syntaxTree->getRootNode()); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /tests/Integration/Generator/SQL/Resolver/ItemFilterResolver.php: -------------------------------------------------------------------------------- 1 | function (string $value): string { 16 | return $this->queryResolver($value); 17 | }, 18 | 'introducedAt' => function (Range $range): string { 19 | return $this->introducedAtResolver($range); 20 | }, 21 | 'type' => function (string $value): string { 22 | return $this->typeResolver($value); 23 | }, 24 | 'price' => function (Range $range): string { 25 | return $this->priceResolver($range); 26 | }, 27 | ]; 28 | } 29 | 30 | 31 | private function queryResolver(string $value): string 32 | { 33 | return "name ILIKE '%${value}%'"; 34 | } 35 | 36 | 37 | private function introducedAtResolver(Range $range): string 38 | { 39 | $rangeFrom = new DateTime((string) $range->getStartValue()); 40 | $rangeTo = new DateTime((string) $range->getEndValue()); 41 | 42 | return sprintf( 43 | "introduced_at %s '%s' AND introduced_at %s '%s'", 44 | $range->getStartSign(), 45 | $rangeFrom->format(DateTime::ATOM), 46 | $range->getEndSign(), 47 | $rangeTo->format(DateTime::ATOM) 48 | ); 49 | } 50 | 51 | 52 | private function typeResolver(string $value): string 53 | { 54 | return "type = '${value}'"; 55 | } 56 | 57 | 58 | private function priceResolver(Range $range): string 59 | { 60 | $condition = ''; 61 | if ($range->isStartDefined()) { 62 | $condition .= sprintf('price %s %s', $range->getStartSign(), $range->getStartValue()); 63 | } 64 | 65 | if ($range->isEndDefined()) { 66 | if ($condition !== '') { 67 | $condition .= ' AND '; 68 | } 69 | $condition .= sprintf('price %s %s', $range->getEndSign(), $range->getEndValue()); 70 | } 71 | 72 | return $condition; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tests/Token/Token/RangeTest.php: -------------------------------------------------------------------------------- 1 | expectException(InvalidArgumentException::class); 38 | new Range('[a TO b]', 0, '', 'a', 'b', $startType, $endType); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /tests/Tokenizer/FullTest.php: -------------------------------------------------------------------------------- 1 | getTokenExtractor(); 530 | $tokenizer = new Tokenizer($tokenExtractor); 531 | $tokenSequence = $tokenizer->tokenize($string); 532 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); 533 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); 534 | self::assertEquals($string, $tokenSequence->getSource()); 535 | } 536 | 537 | 538 | public function providerForTestTokenizeNotRecognized(): array 539 | { 540 | return [ 541 | [ 542 | ( 543 | $blah = mb_convert_encoding( 544 | '👩👩👧👧', 545 | 'UTF-8', 546 | 'HTML-ENTITIES' 547 | ) 548 | ) . '"', 549 | [new WordToken($blah, 0, '', $blah), new Token(Tokenizer::TOKEN_BAILOUT, '"', 7)], 550 | ], 551 | ['"' . $blah, [new Token(Tokenizer::TOKEN_BAILOUT, '"', 0), new WordToken($blah, 1, '', $blah)]], 552 | ['word"', [new WordToken('word', 0, '', 'word'), new Token(Tokenizer::TOKEN_BAILOUT, '"', 4)]], 553 | [ 554 | 'one"two', 555 | [ 556 | new WordToken('one', 0, '', 'one'), 557 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 3), 558 | new WordToken('two', 4, '', 'two'), 559 | ], 560 | ], 561 | [ 562 | 'šđ"čćž', 563 | [ 564 | new WordToken('šđ', 0, '', 'šđ'), 565 | new Token(Tokenizer::TOKEN_BAILOUT, '"', 2), 566 | new WordToken('čćž', 3, '', 'čćž'), 567 | ], 568 | ], 569 | ['AND"', [new Token(Tokenizer::TOKEN_LOGICAL_AND, 'AND', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], 570 | ['OR"', [new Token(Tokenizer::TOKEN_LOGICAL_OR, 'OR', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 2)]], 571 | ['NOT"', [new Token(Tokenizer::TOKEN_LOGICAL_NOT, 'NOT', 0), new Token(Tokenizer::TOKEN_BAILOUT, '"', 3)]], 572 | ]; 573 | } 574 | 575 | 576 | /** 577 | * @dataProvider providerForTestTokenizeNotRecognized 578 | * 579 | * @param string $string 580 | * @param Token[] $expectedTokens 581 | */ 582 | public function testTokenizeNotRecognized($string, array $expectedTokens): void 583 | { 584 | $tokenExtractor = $this->getTokenExtractor(); 585 | $tokenizer = new Tokenizer($tokenExtractor); 586 | $tokenSequence = $tokenizer->tokenize($string); 587 | self::assertInstanceOf(TokenSequence::class, $tokenSequence); 588 | self::assertEquals($expectedTokens, $tokenSequence->getTokens()); 589 | self::assertEquals($string, $tokenSequence->getSource()); 590 | } 591 | 592 | 593 | protected function getTokenExtractor(): AbstractTokenExtractor 594 | { 595 | return new Full; 596 | } 597 | 598 | } 599 | -------------------------------------------------------------------------------- /tests/Tokenizer/TextTest.php: -------------------------------------------------------------------------------- 1 | [new WordToken('#tag', 0, '', '#tag')], 25 | '\#tag' => [new WordToken('\#tag', 0, '', '\#tag')], 26 | '#_tag-tag' => [new WordToken('#_tag-tag', 0, '', '#_tag-tag')], 27 | '#tag+' => [new WordToken('#tag+', 0, '', '#tag+')], 28 | '#tag-' => [new WordToken('#tag-', 0, '', '#tag-')], 29 | '#tag!' => [new WordToken('#tag!', 0, '', '#tag!')], 30 | "#tag\n" => [new WordToken('#tag', 0, '', '#tag'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 4)], 31 | '#tag ' => [new WordToken('#tag', 0, '', '#tag'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 4)], 32 | '#tag(' => [new WordToken('#tag', 0, '', '#tag'), new GroupBeginToken('(', 4, '(', null)], 33 | '#tag)' => [new WordToken('#tag', 0, '', '#tag'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 4)], 34 | '@user' => [new WordToken('@user', 0, '', '@user')], 35 | '@user.user' => [new WordToken('@user.user', 0, '', '@user.user')], 36 | '\@user' => [new WordToken('\@user', 0, '', '\@user')], 37 | '@_user-user' => [new WordToken('@_user-user', 0, '', '@_user-user')], 38 | '@user+' => [new WordToken('@user+', 0, '', '@user+')], 39 | '@user-' => [new WordToken('@user-', 0, '', '@user-')], 40 | '@user!' => [new WordToken('@user!', 0, '', '@user!')], 41 | "@user\n" => [new WordToken('@user', 0, '', '@user'), new Token(Tokenizer::TOKEN_WHITESPACE, "\n", 5)], 42 | '@user ' => [new WordToken('@user', 0, '', '@user'), new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5)], 43 | '@user(' => [new WordToken('@user', 0, '', '@user'), new GroupBeginToken('(', 5, '(', null)], 44 | '@user)' => [new WordToken('@user', 0, '', '@user'), new Token(Tokenizer::TOKEN_GROUP_END, ')', 5)], 45 | '[a TO b]' => [ 46 | new WordToken('[a', 0, '', '[a'), 47 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), 48 | new WordToken('TO', 4, '', 'TO'), 49 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 6), 50 | new WordToken('b]', 7, '', 'b]'), 51 | ], 52 | '[a TO b}' => [ 53 | new WordToken('[a', 0, '', '[a'), 54 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), 55 | new WordToken('TO', 3, '', 'TO'), 56 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), 57 | new WordToken('b}', 6, '', 'b}'), 58 | ], 59 | '{a TO b}' => [ 60 | new WordToken('{a', 0, '', '{a'), 61 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), 62 | new WordToken('TO', 3, '', 'TO'), 63 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), 64 | new WordToken('b}', 6, '', 'b}'), 65 | ], 66 | '{a TO b]' => [ 67 | new WordToken('{a', 0, '', '{a'), 68 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), 69 | new WordToken('TO', 3, '', 'TO'), 70 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), 71 | new WordToken('b]', 6, '', 'b]'), 72 | ], 73 | '[2017-01-01 TO 2017-01-05]' => [ 74 | new WordToken('[2017-01-01', 0, '', '[2017-01-01'), 75 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 11), 76 | new WordToken('TO', 12, '', 'TO'), 77 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 14), 78 | new WordToken('2017-01-05]', 15, '', '2017-01-05]'), 79 | ], 80 | '[20 TO *]' => [ 81 | new WordToken('[20', 0, '', '[20'), 82 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 3), 83 | new WordToken('TO', 7, '', 'TO'), 84 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 9), 85 | new WordToken('*]', 12, '', '*]'), 86 | ], 87 | '[* TO 20]' => [ 88 | new WordToken('[*', 0, '', '[*'), 89 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 2), 90 | new WordToken('TO', 3, '', 'TO'), 91 | new Token(Tokenizer::TOKEN_WHITESPACE, ' ', 5), 92 | new WordToken('20]', 6, '', '20]'), 93 | ], 94 | 'domain:domain:' => [new WordToken('domain:domain:', 0, '', 'domain:domain:')], 95 | 'some.domain:some.domain:' 96 | => [new WordToken('some.domain:some.domain:', 0, '', 'some.domain:some.domain:')], 97 | 'domain:domain:domain:domain' => [ 98 | new WordToken('domain:domain:domain:domain', 0, '', 'domain:domain:domain:domain'), 99 | ], 100 | 'domain\:' => [new WordToken('domain\:', 0, '', 'domain\:')], 101 | 'domain\::' => [new WordToken('domain\::', 0, '', 'domain\::')], 102 | 'domain:word' => [new WordToken('domain:word', 0, '', 'domain:word')], 103 | 'domain\:word' => [new WordToken('domain\:word', 0, '', 'domain\:word')], 104 | 'domain:"phrase"' => [ 105 | new WordToken('domain:', 0, '', 'domain:'), 106 | new PhraseToken('"phrase"', 7, '', '"', 'phrase'), 107 | ], 108 | 'some.domain:"phrase"' => [ 109 | new WordToken('some.domain:', 0, '', 'some.domain:'), 110 | new PhraseToken('"phrase"', 12, '', '"', 'phrase'), 111 | ], 112 | 'domain\:"phrase"' => [ 113 | new WordToken('domain\:', 0, '', 'domain\:'), 114 | new PhraseToken('"phrase"', 8, '', '"', 'phrase'), 115 | ], 116 | 'domain:(one)' => [ 117 | new WordToken('domain:', 0, '', 'domain:'), 118 | new GroupBeginToken('(', 7, '(', ''), 119 | new WordToken('one', 8, '', 'one'), 120 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 11), 121 | ], 122 | 'some.domain:(one)' => [ 123 | new WordToken('some.domain:', 0, '', 'some.domain:'), 124 | new GroupBeginToken('(', 12, '(', ''), 125 | new WordToken('one', 13, '', 'one'), 126 | new Token(Tokenizer::TOKEN_GROUP_END, ')', 16), 127 | ], 128 | ]; 129 | } 130 | 131 | 132 | /** 133 | * @dataProvider providerForTestTokenize 134 | * 135 | * @param string $string 136 | */ 137 | public function testTokenize($string, array $expectedTokens): void 138 | { 139 | $expectedTokens = $this->getExpectedFixtureWithOverride($string, $expectedTokens); 140 | parent::testTokenize($string, $expectedTokens); 141 | } 142 | 143 | 144 | /** 145 | * @param string $string 146 | * 147 | * @return Token[] 148 | */ 149 | protected function getExpectedFixtureWithOverride($string, array $expectedTokens) 150 | { 151 | if (isset(self::$fixtureOverride[$string])) { 152 | return self::$fixtureOverride[$string]; 153 | } 154 | return $expectedTokens; 155 | } 156 | 157 | 158 | protected function getTokenExtractor(): AbstractTokenExtractor 159 | { 160 | return new Text; 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /tests/Tokenizer/TokenizerTest.php: -------------------------------------------------------------------------------- 1 | getMockBuilder(AbstractTokenExtractor::class) 22 | ->setMethods(['getExpressionTypeMap']) 23 | ->getMockForAbstractClass(); 24 | 25 | $extractorMock->expects(self::once()) 26 | ->method('getExpressionTypeMap') 27 | ->willReturn([ 28 | '/(?:\D+|<\d+>)*[!?]/' => Tokenizer::TOKEN_WHITESPACE, 29 | ]); 30 | 31 | /** @var AbstractTokenExtractor $extractor */ 32 | $extractor = $extractorMock; 33 | $extractor->extract('foobar foobar foobar', 0); 34 | } 35 | 36 | 37 | public function testFullExtractTermTokenThrowsException(): void 38 | { 39 | self::expectException(RuntimeException::class); 40 | self::expectExceptionMessage('Could not extract term token from the given data'); 41 | 42 | $extractor = new Full(); 43 | $reflectedClass = new ReflectionClass($extractor); 44 | $reflectedProperty = $reflectedClass->getProperty('expressionTypeMap'); 45 | $reflectedProperty->setAccessible(true); 46 | $reflectedProperty->setValue([ 47 | '/(? foobar)/' => Tokenizer::TOKEN_TERM, 48 | ]); 49 | $extractor->extract('foobar', 0); 50 | } 51 | 52 | 53 | public function testTextExtractTermTokenThrowsException(): void 54 | { 55 | self::expectException(RuntimeException::class); 56 | self::expectExceptionMessage('Could not extract term token from the given data'); 57 | 58 | $extractor = new Text(); 59 | $reflectedClass = new ReflectionClass($extractor); 60 | $reflectedProperty = $reflectedClass->getProperty('expressionTypeMap'); 61 | $reflectedProperty->setAccessible(true); 62 | $reflectedProperty->setValue([ 63 | '/(? foobar)/' => Tokenizer::TOKEN_TERM, 64 | ]); 65 | $extractor->extract('foobar', 0); 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /tests/Value/NodeTraversalTest.php: -------------------------------------------------------------------------------- 1 | getMockForAbstractClass(AbstractNode::class); 25 | /** @var AbstractNode $secondMember */ 26 | $secondMember = $this->getMockForAbstractClass(AbstractNode::class); 27 | $nodes = (new Group([$firstMember, $secondMember]))->getNodes(); 28 | 29 | self::assertSame($firstMember, $nodes[0]); 30 | self::assertSame($secondMember, $nodes[1]); 31 | } 32 | 33 | 34 | public function testLogicalAndNode(): void 35 | { 36 | $leftOperand = $this->getMockForAbstractClass(AbstractNode::class); 37 | $rightOperand = $this->getMockForAbstractClass(AbstractNode::class); 38 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 39 | $nodes = (new LogicalAnd($leftOperand, $rightOperand, $token))->getNodes(); 40 | 41 | self::assertSame($leftOperand, $nodes[0]); 42 | self::assertSame($rightOperand, $nodes[1]); 43 | } 44 | 45 | 46 | public function testLogicalNotNode(): void 47 | { 48 | $operand = $this->getMockForAbstractClass(AbstractNode::class); 49 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 50 | $nodes = (new LogicalNot($operand, $token))->getNodes(); 51 | 52 | self::assertSame($operand, $nodes[0]); 53 | } 54 | 55 | 56 | public function testLogicalOrNode(): void 57 | { 58 | $leftOperand = $this->getMockForAbstractClass(AbstractNode::class); 59 | $rightOperand = $this->getMockForAbstractClass(AbstractNode::class); 60 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 61 | $nodes = (new LogicalOr($leftOperand, $rightOperand, $token))->getNodes(); 62 | 63 | self::assertSame($leftOperand, $nodes[0]); 64 | self::assertSame($rightOperand, $nodes[1]); 65 | } 66 | 67 | 68 | public function testMandatoryNode(): void 69 | { 70 | $operand = $this->getMockForAbstractClass(AbstractNode::class); 71 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 72 | $nodes = (new Mandatory($operand, $token))->getNodes(); 73 | 74 | self::assertSame($operand, $nodes[0]); 75 | } 76 | 77 | 78 | public function testProhibitedNode(): void 79 | { 80 | $operand = $this->getMockForAbstractClass(AbstractNode::class); 81 | $token = new Token(Tokenizer::TOKEN_TERM, '', 0); 82 | $nodes = (new Prohibited($operand, $token))->getNodes(); 83 | 84 | self::assertSame($operand, $nodes[0]); 85 | } 86 | 87 | 88 | public function testQueryNode(): void 89 | { 90 | /** @var AbstractNode $firstMember */ 91 | $firstMember = $this->getMockForAbstractClass(AbstractNode::class); 92 | /** @var AbstractNode $secondMember */ 93 | $secondMember = $this->getMockForAbstractClass(AbstractNode::class); 94 | $nodes = (new Query([$firstMember, $secondMember]))->getNodes(); 95 | 96 | self::assertSame($firstMember, $nodes[0]); 97 | self::assertSame($secondMember, $nodes[1]); 98 | } 99 | 100 | 101 | public function testTermNode(): void 102 | { 103 | /** @var Token $token */ 104 | $token = $this->getMockBuilder(Token::class)->disableOriginalConstructor()->getMock(); 105 | $nodes = (new Term($token))->getNodes(); 106 | 107 | self::assertEmpty($nodes); 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 |