├── .editorconfig ├── .gitignore ├── CHANGELOG-0.x.md ├── CHANGELOG-1.x.md ├── CHANGELOG-2.x.md ├── CHANGELOG-3.x.md ├── LICENSE.md ├── README.md ├── composer.json ├── examples ├── elastica.php └── xml.php ├── phpunit.xml.dist ├── src ├── Builder │ ├── AbstractQueryBuilder.php │ ├── ElasticaQueryBuilder.php │ ├── QueryBuilder.php │ └── XmlQueryBuilder.php ├── Enum │ ├── BoolOperator.php │ └── ComparisonOperator.php ├── Node │ ├── Date.php │ ├── DateRange.php │ ├── Emoji.php │ ├── Emoticon.php │ ├── Field.php │ ├── Hashtag.php │ ├── Mention.php │ ├── Node.php │ ├── NumberRange.php │ ├── Numbr.php │ ├── Phrase.php │ ├── Range.php │ ├── Subquery.php │ ├── Url.php │ ├── Word.php │ └── WordRange.php ├── ParsedQuery.php ├── QueryParser.php ├── Token.php ├── TokenStream.php └── Tokenizer.php └── tests ├── Builder └── XmlQueryBuilderTest.php ├── Fixtures └── test-queries.php ├── QueryParserTest.php ├── TokenizerTest.php └── bootstrap.php /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | indent_size = 2 9 | indent_style = space 10 | insert_final_newline = true 11 | trim_trailing_whitespace = true 12 | 13 | [*.{php,py}] 14 | indent_size = 4 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Symfony App Files # 2 | ############ 3 | *.cache 4 | bin/ 5 | build/ 6 | cache/ 7 | logs/ 8 | tmp/ 9 | vendor/ 10 | 11 | # Deployment/IDE Tools # 12 | ############ 13 | autoload.php 14 | composer.lock 15 | composer.phar 16 | phpunit.xml 17 | .buildpath 18 | *.iml 19 | .idea/ 20 | .phpunit.result.cache 21 | .project 22 | .settings 23 | 24 | # Compiled source # 25 | ################### 26 | *.com 27 | *.class 28 | *.dll 29 | *.exe 30 | *.o 31 | *.so 32 | 33 | # Packages # 34 | ############ 35 | # it's better to unpack these files and commit the raw source 36 | # git has its own built in compression methods 37 | *.7z 38 | *.dmg 39 | *.gz 40 | *.iso 41 | *.jar 42 | *.rar 43 | *.tar 44 | *.zip 45 | 46 | # Logs and databases # 47 | ###################### 48 | *.log 49 | *.sql 50 | *.sqlite 51 | 52 | # OS generated files # 53 | ###################### 54 | .DS_Store 55 | .DS_Store? 56 | ._* 57 | .Spotlight-V100 58 | .Trashes 59 | Icon? 60 | ehthumbs.db 61 | Thumbs.db 62 | 63 | # Vagrant provisioning # 64 | ######################## 65 | .vagrant/ 66 | -------------------------------------------------------------------------------- /CHANGELOG-0.x.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG for 0.x 2 | This changelog references the relevant changes done in 0.x versions. 3 | 4 | 5 | ## v0.3.2 6 | * BUG :: Add minimum requirement check for word matching `[a-zA-Z0-9]+` and ignore empty phrases. 7 | 8 | 9 | ## v0.3.1 10 | * BUG :: Fix invalid string casting on Numbr in ElasticaQueryBuilder. 11 | 12 | 13 | ## v0.3.0 14 | __BREAKING CHANGES__ 15 | 16 | * Update `ElasticaQueryBuilder` to use `"ruflin/elastica": "~5.3"`. 17 | * Require php `>=7.1` in `composer.json`. 18 | * Add php7 type hinting and use `declare(strict_types=1);`. 19 | 20 | 21 | ## v0.2.1 22 | * pull #9: Respect boolean operator preceding subquery. 23 | 24 | 25 | ## v0.2.0 26 | __BREAKING CHANGES__ 27 | 28 | * issue #7: Update `ElasticaQueryBuilder` to use 2.x queries/filters. Requires `"ruflin/elastica": "~3.2"`. 29 | * issue #6: Make TimeZone configurable on any builders that use date nodes. 30 | * The `Number` class was renamed to `Numbr` to prevent issue with scalar type hints in php7. 31 | 32 | 33 | ## v0.1.2 34 | * Allow for `gdbots/common` ~0.1 or ~1.0. 35 | 36 | 37 | ## v0.1.1 38 | * issue #4: Adjust ElasticaQueryBuilder to be more "AND" like by default. 39 | 40 | 41 | ## v0.1.0 42 | * Initial version. 43 | -------------------------------------------------------------------------------- /CHANGELOG-1.x.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG for 1.x 2 | This changelog references the relevant changes done in 1.x versions. 3 | 4 | 5 | ## v1.0.0 6 | * Initial stable version. 7 | -------------------------------------------------------------------------------- /CHANGELOG-2.x.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG for 2.x 2 | This changelog references the relevant changes done in 2.x versions. 3 | 4 | 5 | ## v2.0.2 6 | * Add support for utf-8 characters when parsing words. 7 | 8 | 9 | ## v2.0.1 10 | * Do not truncate input in `Tokenizer::scan`. Removed `substr($input, 0, 256)` rule as we're unsure where/why it's there and seems safe to remove. 11 | 12 | 13 | ## v2.0.0 14 | __BREAKING CHANGES__ 15 | 16 | * Require php `>=7.4` 17 | * Uses php7 type hinting throughout with `declare(strict_types=1);` 18 | * Uses `"ruflin/elastica": "^7.0"` 19 | -------------------------------------------------------------------------------- /CHANGELOG-3.x.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG for 3.x 2 | This changelog references the relevant changes done in 3.x versions. 3 | 4 | 5 | ## v3.0.0 6 | __BREAKING CHANGES__ 7 | 8 | * Require php 8.1. 9 | * Use new php enum instead of the home grown versions. 10 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # Apache License 2 | Version 2.0, January 2004 3 | 4 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 5 | 6 | ## 1. Definitions. 7 | 8 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 9 | through 9 of this document. 10 | 11 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the 12 | License. 13 | 14 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled 15 | by, or are under common control with that entity. For the purposes of this definition, "control" means 16 | (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract 17 | or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial 18 | ownership of such entity. 19 | 20 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 21 | 22 | "Source" form shall mean the preferred form for making modifications, including but not limited to software 23 | source code, documentation source, and configuration files. 24 | 25 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, 26 | including but not limited to compiled object code, generated documentation, and conversions to other media 27 | types. 28 | 29 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, 30 | as indicated by a copyright notice that is included in or attached to the work (an example is provided in the 31 | Appendix below). 32 | 33 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) 34 | the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, 35 | as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not 36 | include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work 37 | and Derivative Works thereof. 38 | 39 | "Contribution" shall mean any work of authorship, including the original version of the Work and any 40 | modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to 41 | Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to 42 | submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of 43 | electronic, verbal, or written communication sent to the Licensor or its representatives, including but not 44 | limited to communication on electronic mailing lists, source code control systems, and issue tracking systems 45 | that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but 46 | excluding communication that is conspicuously marked or otherwise designated in writing by the copyright 47 | owner as "Not a Contribution." 48 | 49 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been 50 | received by Licensor and subsequently incorporated within the Work. 51 | 52 | ## 2. Grant of Copyright License. 53 | 54 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, 55 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare 56 | Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such 57 | Derivative Works in Source or Object form. 58 | 59 | ## 3. Grant of Patent License. 60 | 61 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, 62 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent 63 | license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such 64 | license applies only to those patent claims licensable by such Contributor that are necessarily infringed by 65 | their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such 66 | Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim 67 | or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work 68 | constitutes direct or contributory patent infringement, then any patent licenses granted to You under this 69 | License for that Work shall terminate as of the date such litigation is filed. 70 | 71 | ## 4. Redistribution. 72 | 73 | You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without 74 | modifications, and in Source or Object form, provided that You meet the following conditions: 75 | 76 | 1. You must give any other recipients of the Work or Derivative Works a copy of this License; and 77 | 78 | 2. You must cause any modified files to carry prominent notices stating that You changed the files; and 79 | 80 | 3. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, 81 | trademark, and attribution notices from the Source form of the Work, excluding those notices that do 82 | not pertain to any part of the Derivative Works; and 83 | 84 | 4. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that 85 | You distribute must include a readable copy of the attribution notices contained within such NOTICE 86 | file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one 87 | of the following places: within a NOTICE text file distributed as part of the Derivative Works; within 88 | the Source form or documentation, if provided along with the Derivative Works; or, within a display 89 | generated by the Derivative Works, if and wherever such third-party notices normally appear. The 90 | contents of the NOTICE file are for informational purposes only and do not modify the License. You may 91 | add Your own attribution notices within Derivative Works that You distribute, alongside or as an 92 | addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be 93 | construed as modifying the License. 94 | 95 | You may add Your own copyright statement to Your modifications and may provide additional or different license 96 | terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative 97 | Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the 98 | conditions stated in this License. 99 | 100 | ## 5. Submission of Contributions. 101 | 102 | Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by 103 | You to the Licensor shall be under the terms and conditions of this License, without any additional terms or 104 | conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate 105 | license agreement you may have executed with Licensor regarding such Contributions. 106 | 107 | ## 6. Trademarks. 108 | 109 | This License does not grant permission to use the trade names, trademarks, service marks, or product names of 110 | the Licensor, except as required for reasonable and customary use in describing the origin of the Work and 111 | reproducing the content of the NOTICE file. 112 | 113 | ## 7. Disclaimer of Warranty. 114 | 115 | Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor 116 | provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 117 | or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, 118 | MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the 119 | appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of 120 | permissions under this License. 121 | 122 | ## 8. Limitation of Liability. 123 | 124 | In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless 125 | required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any 126 | Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential 127 | damages of any character arising as a result of this License or out of the use or inability to use the Work 128 | (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or 129 | any and all other commercial damages or losses), even if such Contributor has been advised of the possibility 130 | of such damages. 131 | 132 | ## 9. Accepting Warranty or Additional Liability. 133 | 134 | While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, 135 | acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this 136 | License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole 137 | responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold 138 | each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason 139 | of your accepting any such warranty or additional liability. 140 | 141 | END OF TERMS AND CONDITIONS 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | query-parser-php 2 | ============= 3 | 4 | [![Build Status](https://api.travis-ci.org/gdbots/query-parser-php.svg)](https://travis-ci.org/gdbots/query-parser-php) 5 | 6 | Php library that converts search queries into words, phrases, hashtags, mentions, etc. 7 | 8 | This library supports a simple search query standard. It is meant to support the most common search combinations that a 9 | user would likely enter into your website search box or dashboard application. It intentionally limits the more complex nested capabilities 10 | that you might expect from SQL builders, Lucene, etc. 11 | 12 | 13 | ## Tokenizer 14 | Tokens are split on whitespace unless enclosed in double quotes. The following tokens are extracted by the `Tokenizer`: 15 | 16 | ``` php 17 | class Token implements \JsonSerializable 18 | { 19 | const T_EOI = 0; // end of input 20 | const T_WHITE_SPACE = 1; 21 | const T_IGNORED = 2; // an ignored token, e.g. #, !, etc. when found by themselves, don't do anything with them. 22 | const T_NUMBER = 3; // 10, 0.8, .64, 6.022e23 23 | const T_REQUIRED = 4; // '+' 24 | const T_PROHIBITED = 5; // '-' 25 | const T_GREATER_THAN = 6; // '>' 26 | const T_LESS_THAN = 7; // '<' 27 | const T_EQUALS = 8; // '=' 28 | const T_FUZZY = 9; // '~' 29 | const T_BOOST = 10; // '^' 30 | const T_RANGE_INCL_START = 11; // '[' 31 | const T_RANGE_INCL_END = 12; // ']' 32 | const T_RANGE_EXCL_START = 13; // '{' 33 | const T_RANGE_EXCL_END = 14; // '}' 34 | const T_SUBQUERY_START = 15; // '(' 35 | const T_SUBQUERY_END = 16; // ')' 36 | const T_WILDCARD = 17; // '*' 37 | const T_AND = 18; // 'AND' or '&&' 38 | const T_OR = 19; // 'OR' or '||' 39 | const T_TO = 20; // 'TO' or '..' 40 | const T_WORD = 21; 41 | const T_FIELD_START = 22; // The "field:" portion of "field:value". 42 | const T_FIELD_END = 23; // when a field lexeme ends, i.e. "field:value". This token has no value. 43 | const T_PHRASE = 24; // Phrase (one or more quoted words) 44 | const T_URL = 25; // a valid url 45 | const T_DATE = 26; // date in the format YYYY-MM-DD 46 | const T_HASHTAG = 27; // #hashtag 47 | const T_MENTION = 28; // @mention 48 | const T_EMOTICON = 29; // see https://en.wikipedia.org/wiki/Emoticon 49 | const T_EMOJI = 30; // see https://en.wikipedia.org/wiki/Emoji 50 | ``` 51 | The `T_WHITE_SPACE` and `T_IGNORED` tokens are removed before the output is returned by the scan process. 52 | 53 | 54 | ## QueryParser 55 | 56 | The default query parser produces a `ParsedQuery` object which can be used with a builder to produce a query 57 | for a given search service. 58 | 59 | 60 | #### Basic Usage 61 | 62 | ``` php 63 | setHashtagFieldName('tags'); 70 | 71 | $result = $parser->parse('hello^5 planet:earth +date:2015-12-25 #omg'); 72 | echo $builder->addParsedQuery($result)->toXmlString(); 73 | ``` 74 | Produces the following xml: 75 | ``` xml 76 | 77 | 78 | hello 79 | 80 | earth 81 | 82 | 83 | 2015-12-25 84 | 85 | 86 | omg 87 | 88 | 89 | ``` 90 | 91 | 92 | To get a list of `Node` objects by type, use: 93 | 94 | ``` php 95 | parse('#hashtag1 AND #hashtag2'); 100 | $hashtags = $result->getNodesOfType(Hashtag::NODE_TYPE); 101 | ``` 102 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gdbots/query-parser", 3 | "homepage": "https://github.com/gdbots/query-parser-php", 4 | "description": "Php library that converts search queries into terms, phrases, hashtags, mentions, etc.", 5 | "type": "library", 6 | "license": "Apache-2.0", 7 | "require": { 8 | "php": ">=8.1" 9 | }, 10 | "require-dev": { 11 | "phpunit/phpunit": "^9.5", 12 | "ruflin/elastica": "^7.1" 13 | }, 14 | "autoload": { 15 | "psr-4": { 16 | "Gdbots\\QueryParser\\": "src" 17 | } 18 | }, 19 | "autoload-dev": { 20 | "psr-4": { 21 | "Gdbots\\Tests\\QueryParser\\": "tests" 22 | } 23 | }, 24 | "scripts": { 25 | "test": "vendor/bin/phpunit" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /examples/elastica.php: -------------------------------------------------------------------------------- 1 | [['host' => $host, 'port' => $port]]]); 39 | $client->setLogger(new EchoLogger()); 40 | 41 | $parser = new QueryParser(); 42 | /** @var ElasticaQueryBuilder $builder */ 43 | $builder = (new ElasticaQueryBuilder()) 44 | ->addNestedField('dynamic_fields') 45 | ->setDefaultFieldName('_all') 46 | ->setEmoticonFieldName('emoticons') 47 | ->setHashtagFieldName('hashtags') 48 | ->setMentionFieldName('mentions') 49 | ->addFullTextSearchField('subject') 50 | ->addFullTextSearchField('dynamic_fields.string_val') 51 | ->addFullTextSearchField('dynamic_fields.text_val') 52 | ->setLocalTimeZone(new DateTimeZone('America/Los_Angeles')) 53 | ; 54 | 55 | $qs = isset($argv[1]) ? $argv[1] : 'test'; 56 | $parsedQuery = $parser->parse($qs); 57 | if (!$parsedQuery->hasAMatchableNode()) { 58 | echo 'query: '.$qs.PHP_EOL; 59 | echo 'has no matchable nodes.'.str_repeat(PHP_EOL, 3); 60 | exit; 61 | } 62 | $builder->addParsedQuery($parsedQuery); 63 | 64 | $options = [Search::OPTION_FROM => 0, Search::OPTION_SIZE => 5]; 65 | $query = $builder->getBoolQuery(); 66 | /* 67 | $query = (new FunctionScore()) 68 | ->setQuery($query) 69 | ->setBoostMode(FunctionScore::BOOST_MODE_SUM) 70 | ->addFunction('field_value_factor', [ 71 | 'field' => 'priority', 72 | 'modifier' => 'none', 73 | ], null, 0.4); 74 | */ 75 | $query = \Elastica\Query::create($query); 76 | //$query->setExplain(true); 77 | $query->setSort(['date_sent' => 'desc']); 78 | $results = $client->getIndex($index)->search($query, $options); 79 | 80 | echo 'Total Time (ms) / Records Found:' . PHP_EOL; 81 | echo $results->getTotalTime() . 'ms / ' . $results->getTotalHits() . ' records' . str_repeat(PHP_EOL, 3); 82 | //echo json_encode($results->getResponse()->getData(), JSON_PRETTY_PRINT); 83 | 84 | foreach ($results as $result) { 85 | fgets(STDIN); 86 | echo json_encode($result->getSource(), JSON_PRETTY_PRINT) . PHP_EOL; 87 | echo str_repeat(PHP_EOL, 3).str_repeat('*', 70).str_repeat(PHP_EOL, 3); 88 | } 89 | -------------------------------------------------------------------------------- /examples/xml.php: -------------------------------------------------------------------------------- 1 | setEmoticonFieldName('emoticons') 14 | ->setHashtagFieldName('tags') 15 | ->setMentionFieldName('mentions') 16 | ; 17 | 18 | $header = str_repeat(PHP_EOL, 4).'#### %s'.PHP_EOL; 19 | 20 | foreach ($tests as $test) { 21 | $result = $parser->parse($test['input']); 22 | 23 | echo sprintf($header, 'START TEST: '.$test['name']); 24 | echo $test['input']; 25 | 26 | 27 | echo sprintf($header, 'RAW NODES AS JSON'); 28 | echo json_encode($result, JSON_PRETTY_PRINT); 29 | 30 | 31 | echo sprintf($header, 'NODES AS XML'); 32 | $xml = $builder->clear()->addParsedQuery($result)->toXmlString(); 33 | echo $xml; 34 | 35 | 36 | echo str_repeat(PHP_EOL, 10).str_repeat('*', 70).str_repeat(PHP_EOL, 5); 37 | fgets(STDIN); 38 | } 39 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | tests/ 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/Builder/AbstractQueryBuilder.php: -------------------------------------------------------------------------------- 1 | true, 38 | '_all' => true, 39 | 'title' => true, 40 | 'tiny_title' => true, 41 | 'short_title' => true, 42 | 'excerpt' => true, 43 | 'description' => true, 44 | 'overview' => true, 45 | 'summary' => true, 46 | 'story' => true, 47 | 'html' => true, 48 | 'text' => true, 49 | 'markdown' => true, 50 | 'content' => true, 51 | 'contents' => true, 52 | 'contents-continued' => true, 53 | 'contents-md' => true, 54 | 'contents-mobile' => true, 55 | 'mobile-contents' => true, 56 | 'txt-contents' => true, 57 | 'text-contents' => true, 58 | 'abstract' => true, 59 | 'search_text' => true, 60 | 'cover' => true, 61 | 'bio' => true, 62 | 'mini_bio' => true, 63 | 'meta_title' => true, 64 | 'meta_description' => true, 65 | 'meta_keywords' => true, 66 | 'og_title' => true, 67 | 'og_description' => true, 68 | 'og_keywords' => true, 69 | 'seo_title' => true, 70 | 'seo_description' => true, 71 | 'seo_keywords' => true, 72 | 'img_credit' => true, 73 | 'img_caption' => true, 74 | 'credit' => true, 75 | 'caption' => true, 76 | 'img_credits' => true, 77 | 'img_captions' => true, 78 | 'image_credits' => true, 79 | 'image_captions' => true, 80 | 'credits' => true, 81 | 'captions' => true, 82 | 'full_name' => true, 83 | 'first_name' => true, 84 | 'last_name' => true, 85 | 'street1' => true, 86 | 'street2' => true, 87 | 'city' => true, 88 | 'address.street1' => true, 89 | 'address.street2' => true, 90 | 'address.city' => true, 91 | 'ctx_ip_geo.street1' => true, 92 | 'ctx_ip_geo.street2' => true, 93 | 'ctx_ip_geo.city' => true, 94 | ]; 95 | 96 | protected string $defaultFieldName = '_all'; 97 | protected ?string $emojiFieldName = null; 98 | protected ?string $emoticonFieldName = null; 99 | protected ?string $hashtagFieldName = null; 100 | protected ?string $mentionFieldName = null; 101 | protected ?\DateTimeZone $localTimeZone = null; 102 | 103 | public function clear(): self 104 | { 105 | return $this; 106 | } 107 | 108 | final public function setFullTextSearchFields(array $fields): self 109 | { 110 | $this->fullTextSearchFields = array_flip($fields); 111 | return $this; 112 | } 113 | 114 | final public function addFullTextSearchField(string $fieldName): self 115 | { 116 | $this->fullTextSearchFields[$fieldName] = true; 117 | return $this; 118 | } 119 | 120 | final public function removeFullTextSearchField(string $fieldName): self 121 | { 122 | unset($this->fullTextSearchFields[$fieldName]); 123 | return $this; 124 | } 125 | 126 | final public function getFullTextSearchFields(): array 127 | { 128 | return array_keys($this->fullTextSearchFields); 129 | } 130 | 131 | final public function supportsFullTextSearch(string $fieldName): bool 132 | { 133 | return isset($this->fullTextSearchFields[trim(strtolower($fieldName))]); 134 | } 135 | 136 | final public function setDefaultFieldName(string $fieldName): self 137 | { 138 | $this->defaultFieldName = $fieldName; 139 | return $this; 140 | } 141 | 142 | final public function setEmojiFieldName(string $fieldName): self 143 | { 144 | $this->emojiFieldName = $fieldName; 145 | return $this; 146 | } 147 | 148 | final public function setEmoticonFieldName(string $fieldName): self 149 | { 150 | $this->emoticonFieldName = $fieldName; 151 | return $this; 152 | } 153 | 154 | final public function setHashtagFieldName(string $fieldName): self 155 | { 156 | $this->hashtagFieldName = $fieldName; 157 | return $this; 158 | } 159 | 160 | final public function setMentionFieldName(string $fieldName): self 161 | { 162 | $this->mentionFieldName = $fieldName; 163 | return $this; 164 | } 165 | 166 | final public function setLocalTimeZone(\DateTimeZone $timeZone): self 167 | { 168 | $this->localTimeZone = $timeZone; 169 | return $this; 170 | } 171 | 172 | final public function addParsedQuery(ParsedQuery $parsedQuery): self 173 | { 174 | foreach ($parsedQuery->getNodes() as $node) { 175 | $node->acceptBuilder($this); 176 | } 177 | 178 | return $this; 179 | } 180 | 181 | final public function addDate(Date $date): self 182 | { 183 | $this->handleTerm($date); 184 | return $this; 185 | } 186 | 187 | final public function addEmoji(Emoji $emoji): self 188 | { 189 | if ($this->inField || null === $this->emojiFieldName) { 190 | $this->handleTerm($emoji); 191 | return $this; 192 | } 193 | 194 | $field = new Field( 195 | $this->emojiFieldName, 196 | $emoji, 197 | $emoji->getBoolOperator(), 198 | $emoji->useBoost(), 199 | $emoji->getBoost() 200 | ); 201 | 202 | return $this->addField($field); 203 | } 204 | 205 | final public function addEmoticon(Emoticon $emoticon): self 206 | { 207 | if ($this->inField || null === $this->emoticonFieldName) { 208 | $this->handleTerm($emoticon); 209 | return $this; 210 | } 211 | 212 | $field = new Field( 213 | $this->emoticonFieldName, 214 | $emoticon, 215 | $emoticon->getBoolOperator(), 216 | $emoticon->useBoost(), 217 | $emoticon->getBoost() 218 | ); 219 | 220 | return $this->addField($field); 221 | } 222 | 223 | final public function addField(Field $field): self 224 | { 225 | if ($this->inField || $this->inRange) { 226 | throw new \LogicException('A Field cannot be nested in another Field or Range.'); 227 | } 228 | 229 | $this->inField = true; 230 | $this->currentField = $field; 231 | $this->queryOnFieldIsCacheable = $this->queryOnFieldIsCacheable($field); 232 | $this->startField($field, $this->queryOnFieldIsCacheable); 233 | $field->getNode()->acceptBuilder($this); 234 | $this->endField($field, $this->queryOnFieldIsCacheable); 235 | $this->inField = false; 236 | $this->currentField = null; 237 | $this->queryOnFieldIsCacheable = false; 238 | return $this; 239 | } 240 | 241 | final public function addHashtag(Hashtag $hashtag): self 242 | { 243 | if ($this->inField || null === $this->hashtagFieldName) { 244 | $this->handleTerm($hashtag); 245 | return $this; 246 | } 247 | 248 | $field = new Field( 249 | $this->hashtagFieldName, 250 | $hashtag, 251 | $hashtag->getBoolOperator(), 252 | $hashtag->useBoost(), 253 | $hashtag->getBoost() 254 | ); 255 | 256 | return $this->addField($field); 257 | } 258 | 259 | final public function addMention(Mention $mention): self 260 | { 261 | if ($this->inField || null === $this->mentionFieldName) { 262 | $this->handleTerm($mention); 263 | return $this; 264 | } 265 | 266 | $field = new Field( 267 | $this->mentionFieldName, 268 | $mention, 269 | $mention->getBoolOperator(), 270 | $mention->useBoost(), 271 | $mention->getBoost() 272 | ); 273 | 274 | return $this->addField($field); 275 | } 276 | 277 | final public function addNumber(Numbr $number): self 278 | { 279 | $this->handleTerm($number); 280 | return $this; 281 | } 282 | 283 | final public function addPhrase(Phrase $phrase): self 284 | { 285 | $this->handleText($phrase); 286 | return $this; 287 | } 288 | 289 | final public function addRange(Range $range): self 290 | { 291 | if (!$this->inField || $this->inRange || $this->inSubquery) { 292 | throw new \LogicException('A Range can only be used within a field. e.g. rating:[1..5]'); 293 | } 294 | 295 | $this->inRange = true; 296 | $this->handleRange($range, $this->currentField, $this->queryOnFieldIsCacheable); 297 | $this->inRange = false; 298 | return $this; 299 | } 300 | 301 | final public function addSubquery(Subquery $subquery): self 302 | { 303 | if ($this->inRange || $this->inSubquery) { 304 | throw new \LogicException('A Subquery cannot be nested or within a Range.'); 305 | } 306 | 307 | $this->inSubquery = true; 308 | $this->startSubquery($subquery, $this->currentField); 309 | 310 | foreach ($subquery->getNodes() as $node) { 311 | $node->acceptBuilder($this); 312 | } 313 | 314 | $this->endSubquery($subquery, $this->currentField); 315 | $this->inSubquery = false; 316 | 317 | return $this; 318 | } 319 | 320 | final public function addUrl(Url $url): self 321 | { 322 | $this->handleTerm($url); 323 | return $this; 324 | } 325 | 326 | final public function addWord(Word $word): self 327 | { 328 | $this->handleText($word); 329 | return $this; 330 | } 331 | 332 | final protected function inField(): bool 333 | { 334 | return $this->inField; 335 | } 336 | 337 | final protected function inRange(): bool 338 | { 339 | return $this->inRange; 340 | } 341 | 342 | final protected function inSubquery(): bool 343 | { 344 | return $this->inSubquery; 345 | } 346 | 347 | private function handleText(Node $node): void 348 | { 349 | if ($this->inField && !$this->supportsFullTextSearch($this->currentField->getName())) { 350 | $this->handleTerm($node); 351 | return; 352 | } 353 | 354 | /* 355 | * When in a simple field, the bool operator is based on 356 | * the field, not the node in the field. 357 | * +field:value vs. field:+value 358 | */ 359 | if ($this->inField && !$this->currentField->hasCompoundNode()) { 360 | $isOptional = $this->currentField->isOptional(); 361 | $isRequired = $this->currentField->isRequired(); 362 | } else { 363 | $isOptional = $node->isOptional(); 364 | $isRequired = $node->isRequired(); 365 | } 366 | 367 | if ($node instanceof Word && $node->isStopWord()) { 368 | $this->shouldMatch($node, $this->currentField); 369 | return; 370 | } elseif ($isOptional) { 371 | $this->shouldMatch($node, $this->currentField); 372 | return; 373 | } elseif ($isRequired) { 374 | $this->mustMatch($node, $this->currentField); 375 | return; 376 | } 377 | 378 | $this->mustNotMatch($node, $this->currentField); 379 | } 380 | 381 | private function handleTerm(Node $node): void 382 | { 383 | /* 384 | * When in a simple field, the bool operator is based on 385 | * the field, not the node in the field. 386 | * +field:value vs. field:+value 387 | */ 388 | if ($this->inField && !$this->currentField->hasCompoundNode()) { 389 | $isOptional = $this->currentField->isOptional(); 390 | $isRequired = $this->currentField->isRequired(); 391 | } else { 392 | $isOptional = $node->isOptional(); 393 | $isRequired = $node->isRequired(); 394 | } 395 | 396 | if ($isOptional) { 397 | $this->shouldMatchTerm($node, $this->currentField); 398 | return; 399 | } elseif ($isRequired) { 400 | $this->mustMatchTerm($node, $this->currentField, $this->queryOnFieldIsCacheable); 401 | return; 402 | } 403 | 404 | $this->mustNotMatchTerm($node, $this->currentField, $this->queryOnFieldIsCacheable); 405 | } 406 | 407 | /** 408 | * If the query on this particular field could be cached because it contains 409 | * only exact values, is not optional or boosted then the storage/search 410 | * provider might be able to cache the resultset or optimize the query 411 | * against this field. 412 | * 413 | * This is typically used on required fields that will prefilter the 414 | * results that will be searched on. For example, find all videos 415 | * with "cats" in them that are "status:active". It makes no sense 416 | * to even search for cats in a video when status is not active. 417 | * 418 | * @param Field $field 419 | * 420 | * @return bool 421 | */ 422 | protected function queryOnFieldIsCacheable(Field $field): bool 423 | { 424 | if ($field->isOptional() || $field->useBoost()) { 425 | return false; 426 | } 427 | 428 | $node = $field->getNode(); 429 | if ($node->useFuzzy() 430 | || $this->supportsFullTextSearch($field->getName()) 431 | || $node instanceof Subquery 432 | || $node instanceof WordRange 433 | || $node instanceof Phrase 434 | || ($node instanceof Word && $node->hasTrailingWildcard()) 435 | ) { 436 | return false; 437 | } 438 | 439 | return true; 440 | } 441 | 442 | protected function startField(Field $field, bool $cacheable = false): void 443 | { 444 | } 445 | 446 | protected function endField(Field $field, bool $cacheable = false): void 447 | { 448 | } 449 | 450 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void 451 | { 452 | } 453 | 454 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void 455 | { 456 | } 457 | 458 | abstract protected function handleRange(Range $range, Field $field, bool $cacheable = false): void; 459 | 460 | abstract protected function mustMatch(Node $node, ?Field $field = null): void; 461 | 462 | abstract protected function shouldMatch(Node $node, ?Field $field = null): void; 463 | 464 | abstract protected function mustNotMatch(Node $node, ?Field $field = null): void; 465 | 466 | abstract protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void; 467 | 468 | abstract protected function shouldMatchTerm(Node $node, ?Field $field = null): void; 469 | 470 | abstract protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void; 471 | } 472 | -------------------------------------------------------------------------------- /src/Builder/ElasticaQueryBuilder.php: -------------------------------------------------------------------------------- 1 | defaultFieldName = '_all'; 68 | $this->qb = new RuflinQueryBuilder(); 69 | $this->clear(); 70 | } 71 | 72 | public function clear(): self 73 | { 74 | $this->boolQuery = $this->qb->query()->bool(); 75 | $this->outerBoolQuery = $this->boolQuery; 76 | $this->nestedQueries = []; 77 | return $this; 78 | } 79 | 80 | public function ignoreEmojis(bool $ignoreEmojis = true): self 81 | { 82 | $this->ignoreEmojis = $ignoreEmojis; 83 | return $this; 84 | } 85 | 86 | public function ignoreEmoticons(bool $ignoreEmoticons = true): self 87 | { 88 | $this->ignoreEmoticons = $ignoreEmoticons; 89 | return $this; 90 | } 91 | 92 | public function ignoreStopWords(bool $ignoreStopWords = true): self 93 | { 94 | $this->ignoreStopWords = $ignoreStopWords; 95 | return $this; 96 | } 97 | 98 | public function lowerCaseTerms(bool $lowerCaseTerms = true): self 99 | { 100 | $this->lowerCaseTerms = $lowerCaseTerms; 101 | return $this; 102 | } 103 | 104 | public function setNestedFields(array $fields): self 105 | { 106 | $this->nestedFields = array_flip($fields); 107 | return $this; 108 | } 109 | 110 | public function addNestedField(string $fieldName): self 111 | { 112 | $this->nestedFields[$fieldName] = true; 113 | return $this; 114 | } 115 | 116 | public function removeNestedField(string $fieldName): self 117 | { 118 | unset($this->nestedFields[$fieldName]); 119 | return $this; 120 | } 121 | 122 | public function getNestedFields(): array 123 | { 124 | return array_keys($this->nestedFields); 125 | } 126 | 127 | public function getBoolQuery(): BoolQuery 128 | { 129 | if ($this->boolQuery->hasParam('must')) { 130 | // if a "must" is used we assume they wanted everything else optional 131 | return $this->boolQuery; 132 | } 133 | 134 | return $this->boolQuery->setMinimumShouldMatch('2<80%'); 135 | } 136 | 137 | protected function handleRange(Range $range, Field $field, bool $cacheable = false): void 138 | { 139 | $useBoost = $field->useBoost(); 140 | $boost = $field->getBoost(); 141 | $boolOp = $field->getBoolOperator(); 142 | 143 | if ($boolOp === BoolOperator::REQUIRED) { 144 | $method = 'addMust'; 145 | } elseif ($boolOp === BoolOperator::PROHIBITED) { 146 | $method = 'addMustNot'; 147 | } else { 148 | $method = 'addShould'; 149 | } 150 | 151 | if ($range->isExclusive()) { 152 | $lowerOperator = 'gt'; 153 | $upperOperator = 'lt'; 154 | } else { 155 | $lowerOperator = 'gte'; 156 | $upperOperator = 'lte'; 157 | } 158 | 159 | $data = []; 160 | 161 | if ($range instanceof DateRange) { 162 | if ($range->hasLowerNode()) { 163 | $data[$lowerOperator] = $range->getLowerNode() 164 | ->toDateTime($this->localTimeZone) 165 | ->format('Y-m-d'); 166 | } 167 | if ($range->hasUpperNode()) { 168 | $data[$upperOperator] = $range->getUpperNode() 169 | ->toDateTime($this->localTimeZone) 170 | ->modify('+1 day') 171 | ->format('Y-m-d'); 172 | } 173 | } else { 174 | if ($range->hasLowerNode()) { 175 | $data[$lowerOperator] = $range->getLowerNode()->getValue(); 176 | } 177 | if ($range->hasUpperNode()) { 178 | $data[$upperOperator] = $range->getUpperNode()->getValue(); 179 | } 180 | } 181 | 182 | if ($cacheable) { 183 | if ('addMustNot' === $method) { 184 | $this->addToBoolQuery($method, $field->getName(), $this->qb->query()->range($field->getName(), $data)); 185 | } else { 186 | $this->addToBoolQuery('addFilter', $field->getName(), $this->qb->query()->range($field->getName(), $data)); 187 | } 188 | 189 | return; 190 | } 191 | 192 | if ($useBoost) { 193 | $data['boost'] = $boost; 194 | } 195 | 196 | $this->addToBoolQuery($method, $field->getName(), $this->qb->query()->range($field->getName(), $data)); 197 | } 198 | 199 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void 200 | { 201 | $this->outerBoolQuery = $this->boolQuery; 202 | $this->boolQuery = $this->qb->query()->bool(); 203 | } 204 | 205 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void 206 | { 207 | $params = $this->boolQuery->getParams(); 208 | if (!empty($params)) { 209 | $this->boolQuery->setMinimumShouldMatch(1); 210 | 211 | if ($this->inField()) { 212 | $useBoost = $field->useBoost(); 213 | $boost = $field->getBoost(); 214 | $boolOp = $field->getBoolOperator(); 215 | } else { 216 | $useBoost = $subquery->useBoost(); 217 | $boost = $subquery->getBoost(); 218 | $boolOp = $subquery->getBoolOperator(); 219 | } 220 | 221 | if ($useBoost) { 222 | $this->boolQuery->setBoost($boost); 223 | } 224 | 225 | if ($boolOp === BoolOperator::REQUIRED) { 226 | $this->outerBoolQuery->addMust($this->boolQuery); 227 | } elseif ($boolOp === BoolOperator::PROHIBITED) { 228 | $this->outerBoolQuery->addMustNot($this->boolQuery); 229 | } else { 230 | $this->outerBoolQuery->addShould($this->boolQuery); 231 | } 232 | } 233 | 234 | $this->boolQuery = $this->outerBoolQuery; 235 | } 236 | 237 | protected function mustMatch(Node $node, ?Field $field = null): void 238 | { 239 | $this->addTextToQuery('addMust', $node, $field); 240 | } 241 | 242 | protected function shouldMatch(Node $node, ?Field $field = null): void 243 | { 244 | $this->addTextToQuery('addShould', $node, $field); 245 | } 246 | 247 | protected function mustNotMatch(Node $node, ?Field $field = null): void 248 | { 249 | $this->addTextToQuery('addMustNot', $node, $field); 250 | } 251 | 252 | /** 253 | * Adds a text node to the active query. These all use the "match" when full 254 | * text searching is needed/supported. 255 | * 256 | * @param string $method 257 | * @param Node $node 258 | * @param Field $field 259 | */ 260 | protected function addTextToQuery(string $method, Node $node, ?Field $field = null): void 261 | { 262 | if ($node instanceof Word && $node->isStopWord() && $this->ignoreStopWords) { 263 | return; 264 | } 265 | 266 | $fieldName = $this->inField() ? $field->getName() : $this->defaultFieldName; 267 | 268 | if ($this->inField() && !$this->inSubquery()) { 269 | $useBoost = $field->useBoost(); 270 | $boost = $field->getBoost(); 271 | $useFuzzy = $field->useFuzzy(); 272 | $fuzzy = $field->getFuzzy(); 273 | } else { 274 | $useBoost = $node->useBoost(); 275 | $boost = $node->getBoost(); 276 | $useFuzzy = $node->useFuzzy(); 277 | $fuzzy = $node->getFuzzy(); 278 | } 279 | 280 | /* 281 | * Look for special chars and if found, enforce fuzzy. 282 | * todo: review this with more test cases 283 | */ 284 | if (!$useFuzzy 285 | && $node instanceof Phrase 286 | && 'addShould' === $method 287 | && preg_match('/[^a-zA-Z0-9\s\._-]+/', $node->getValue()) 288 | ) { 289 | $useFuzzy = true; 290 | $fuzzy = 1; 291 | } 292 | 293 | if ($node instanceof Phrase) { 294 | $data = ['query' => $node->getValue()]; 295 | 296 | if ($useBoost) { 297 | $data['boost'] = $boost; 298 | } 299 | 300 | if ($useFuzzy) { 301 | $data['slop'] = $fuzzy; 302 | } 303 | 304 | $query = $this->qb->query()->match_phrase($fieldName, $data); 305 | } elseif ($useFuzzy) { 306 | $query = $this->qb->query()->fuzzy($fieldName, $node->getValue()); 307 | $query->setFieldOption('fuzziness', $fuzzy); 308 | 309 | if ($useBoost) { 310 | $query->setFieldOption('boost', $boost); 311 | } 312 | } elseif ($node instanceof Word && $node->hasTrailingWildcard()) { 313 | $query = $this->qb->query()->wildcard( 314 | $fieldName, 315 | strtolower($node->getValue()) . '*', 316 | $useBoost ? $boost : Word::DEFAULT_BOOST 317 | ); 318 | } else { 319 | $data = ['query' => $node->getValue(), 'operator' => 'and', 'lenient' => true]; 320 | 321 | if ($useBoost) { 322 | $data['boost'] = $boost; 323 | } 324 | 325 | $query = $this->qb->query()->match($fieldName, $data); 326 | } 327 | 328 | $this->addToBoolQuery($method, $fieldName, $query); 329 | } 330 | 331 | protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void 332 | { 333 | $this->addTermToQuery('addMust', $node, $field, $cacheable); 334 | } 335 | 336 | protected function shouldMatchTerm(Node $node, ?Field $field = null): void 337 | { 338 | $this->addTermToQuery('addShould', $node, $field); 339 | } 340 | 341 | protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void 342 | { 343 | $this->addTermToQuery('addMustNot', $node, $field, $cacheable); 344 | } 345 | 346 | /** 347 | * Adds a term to the bool query or filter context. Filter context is used when the 348 | * request for that item could be cached, like documents with hashtag of cats. 349 | * 350 | * @param string $method 351 | * @param Node $node 352 | * @param Field $field 353 | * @param bool $cacheable 354 | */ 355 | protected function addTermToQuery(string $method, Node $node, ?Field $field = null, bool $cacheable = false): void 356 | { 357 | if ($node instanceof Emoji && $this->ignoreEmojis) { 358 | return; 359 | } 360 | 361 | if ($node instanceof Emoticon && $this->ignoreEmoticons) { 362 | return; 363 | } 364 | 365 | $value = $this->lowerCaseTerms && !$node instanceof Numbr ? strtolower((string)$node->getValue()) : $node->getValue(); 366 | $fieldName = $this->inField() ? $field->getName() : $this->defaultFieldName; 367 | 368 | if ($this->inField() && !$this->inSubquery()) { 369 | $useBoost = $field->useBoost(); 370 | $boost = $field->getBoost(); 371 | } else { 372 | $useBoost = $node->useBoost(); 373 | $boost = $node->getBoost(); 374 | } 375 | 376 | if ('_exists_' === $fieldName) { 377 | $term = new Exists($value); 378 | $method = 'addMust'; 379 | $cacheable = true; 380 | } elseif ('_missing_' === $fieldName) { 381 | $term = new Exists($value); 382 | $method = 'addMustNot'; 383 | $cacheable = true; 384 | } elseif ($node instanceof Date) { 385 | $term = $this->createDateRangeForSingleNode( 386 | $fieldName, 387 | $node, 388 | $cacheable, 389 | $useBoost ? $boost : Date::DEFAULT_BOOST 390 | ); 391 | } elseif ($node instanceof Numbr && $node->useComparisonOperator()) { 392 | $data = [$node->getComparisonOperator()->value => $value]; 393 | if ($useBoost) { 394 | $data['boost'] = $boost; 395 | } 396 | $term = $this->qb->query()->range($fieldName, $data); 397 | } else { 398 | $term = $this->qb->query()->term(); 399 | $term->setTerm($fieldName, $value, $boost); 400 | } 401 | 402 | if ($cacheable) { 403 | if ('addMustNot' === $method) { 404 | $this->addToBoolQuery($method, $fieldName, $term); 405 | } else { 406 | $this->addToBoolQuery('addFilter', $fieldName, $term); 407 | } 408 | } else { 409 | $this->addToBoolQuery($method, $fieldName, $term); 410 | } 411 | } 412 | 413 | /** 414 | * When dealing with dates we have to create a range, even when the user provides 415 | * an exact date. This is because a user asking for documents on date 2015-12-01 416 | * but the value is stored as a timestamp (for example). 417 | * So we ask for documents >=2015-12-01 and <=2015-12-02 418 | * 419 | * The Date node is a date with no time component. @see Date::toDateTime 420 | * 421 | * @param string $fieldName 422 | * @param Date $node 423 | * @param bool $cacheable 424 | * @param float $boost 425 | * 426 | * @return RangeQuery 427 | */ 428 | protected function createDateRangeForSingleNode( 429 | string $fieldName, 430 | Date $node, 431 | bool $cacheable = false, 432 | float $boost = Date::DEFAULT_BOOST 433 | ): RangeQuery { 434 | $operator = $node->getComparisonOperator(); 435 | 436 | if ($operator === ComparisonOperator::EQ) { 437 | $date = $node->toDateTime($this->localTimeZone); 438 | $data = [ 439 | 'gte' => $date->format('Y-m-d'), 440 | 'lt' => $date->modify('+1 day')->format('Y-m-d'), 441 | ]; 442 | } else { 443 | $data = [$operator->value => $node->toDateTime($this->localTimeZone)->format('Y-m-d')]; 444 | } 445 | 446 | if ($cacheable) { 447 | return $this->qb->query()->range($fieldName, $data); 448 | } 449 | 450 | $data['boost'] = $boost; 451 | return $this->qb->query()->range($fieldName, $data); 452 | } 453 | 454 | protected function addToBoolQuery(string $method, string $fieldName, AbstractQuery $query): void 455 | { 456 | if (!str_contains($fieldName, '.')) { 457 | $this->boolQuery->$method($query); 458 | return; 459 | } 460 | 461 | $fieldName = str_replace('.raw', '', $fieldName); 462 | $nestedPath = substr($fieldName, 0, strrpos($fieldName, '.')); 463 | if (!isset($this->nestedFields[$nestedPath])) { 464 | $this->boolQuery->$method($query); 465 | return; 466 | } 467 | 468 | $nestedQuery = $nestedPath . '-' . $method; 469 | if (!isset($this->nestedQueries[$nestedQuery])) { 470 | $this->nestedQueries[$nestedQuery] = (new Nested()) 471 | ->setQuery($this->qb->query()->bool()->setMinimumShouldMatch('2<80%')) 472 | ->setPath($nestedPath) 473 | ->setParam('ignore_unmapped', true); 474 | $this->boolQuery->$method($this->nestedQueries[$nestedQuery]); 475 | } 476 | 477 | $this->nestedQueries[$nestedQuery]->getParam('query')->$method($query); 478 | } 479 | } 480 | -------------------------------------------------------------------------------- /src/Builder/QueryBuilder.php: -------------------------------------------------------------------------------- 1 | result = ''; 32 | $this->indent = 2; 33 | return $this; 34 | } 35 | 36 | public function toXmlString(): string 37 | { 38 | return '' . PHP_EOL . '' . PHP_EOL . rtrim((string)$this->result) . PHP_EOL . ''; 39 | } 40 | 41 | public function toSimpleXmlElement(): \SimpleXMLElement 42 | { 43 | try { 44 | $xml = new \SimpleXMLElement($this->toXmlString()); 45 | } catch (\Throwable $e) { 46 | $xml = null; 47 | } 48 | 49 | if ($xml instanceof \SimpleXMLElement) { 50 | return $xml; 51 | } 52 | 53 | return new \SimpleXMLElement(''); 54 | } 55 | 56 | protected function startField(Field $field, bool $cacheable = false): void 57 | { 58 | $tag = sprintf('field name="%s"', $field->getName()); 59 | 60 | if (!$field->isOptional()) { 61 | $tag .= sprintf(' bool_operator="%s"', strtolower($field->getBoolOperator()->name)); 62 | } 63 | 64 | if ($cacheable) { 65 | $tag .= ' cacheable="true"'; 66 | } 67 | 68 | if ($field->useBoost()) { 69 | $tag .= sprintf(' boost="%s"', $field->getBoost()); 70 | } 71 | 72 | $this->printLine(sprintf('<%s>', $tag)); 73 | $this->indent(); 74 | } 75 | 76 | protected function endField(Field $field, bool $cacheable = false): void 77 | { 78 | $this->outdent(); 79 | $this->printLine(''); 80 | } 81 | 82 | protected function handleRange(Range $range, Field $field, bool $cacheable = false): void 83 | { 84 | $this->printLine( 85 | $range->isExclusive() ? '<' . $range::NODE_TYPE . ' exclusive="true">' : '<' . $range::NODE_TYPE . '>' 86 | ); 87 | $this->indent(); 88 | $this->printLine(''); 89 | $this->indent(); 90 | 91 | if ($range->hasLowerNode()) { 92 | $range->getLowerNode()->acceptBuilder($this); 93 | } else { 94 | $this->printLine(''); 95 | } 96 | 97 | $this->outdent(); 98 | $this->printLine(''); 99 | $this->printLine(''); 100 | $this->indent(); 101 | 102 | if ($range->hasUpperNode()) { 103 | $range->getUpperNode()->acceptBuilder($this); 104 | } else { 105 | $this->printLine(''); 106 | } 107 | 108 | $this->outdent(); 109 | $this->printLine(''); 110 | $this->outdent(); 111 | 112 | $this->printLine(''); 113 | } 114 | 115 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void 116 | { 117 | $tag = $subquery::NODE_TYPE; 118 | $inField = $field instanceof Field; 119 | 120 | if (!$inField && $subquery->useBoost()) { 121 | $tag .= sprintf(' boost="%s"', $subquery->getBoost()); 122 | } 123 | 124 | $this->printLine(sprintf('<%s>', $tag)); 125 | $this->indent(); 126 | } 127 | 128 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void 129 | { 130 | $this->outdent(); 131 | $this->printLine(''); 132 | } 133 | 134 | protected function mustMatch(Node $node, ?Field $field = null): void 135 | { 136 | $this->printSimpleNode(__FUNCTION__, $node, $field); 137 | } 138 | 139 | protected function shouldMatch(Node $node, ?Field $field = null): void 140 | { 141 | $this->printSimpleNode(__FUNCTION__, $node, $field); 142 | } 143 | 144 | protected function mustNotMatch(Node $node, ?Field $field = null): void 145 | { 146 | $this->printSimpleNode(__FUNCTION__, $node, $field); 147 | } 148 | 149 | protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void 150 | { 151 | $this->printSimpleNode(__FUNCTION__, $node, $field); 152 | } 153 | 154 | protected function shouldMatchTerm(Node $node, ?Field $field = null): void 155 | { 156 | $this->printSimpleNode(__FUNCTION__, $node, $field); 157 | } 158 | 159 | protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void 160 | { 161 | $this->printSimpleNode(__FUNCTION__, $node, $field); 162 | } 163 | 164 | protected function printSimpleNode(string $rule, Node $node, ?Field $field = null): void 165 | { 166 | if ($this->inRange()) { 167 | $this->printLine(sprintf('<%s>%s', $node::NODE_TYPE, $node->getValue(), $node::NODE_TYPE)); 168 | return; 169 | } 170 | 171 | if ($field instanceof Field) { 172 | $tag = $node::NODE_TYPE; 173 | if ($node instanceof Word && $node->hasTrailingWildcard()) { 174 | $tag .= ' trailing_wildcard="true"'; 175 | } 176 | } else { 177 | $tag = $node::NODE_TYPE; 178 | if ($node->useBoost()) { 179 | $tag .= sprintf(' boost="%s"', $node->getBoost()); 180 | } elseif ($node->useFuzzy()) { 181 | $tag .= sprintf(' fuzzy="%s"', $node->getFuzzy()); 182 | } elseif ($node instanceof Word && $node->hasTrailingWildcard()) { 183 | $tag .= ' trailing_wildcard="true"'; 184 | } 185 | } 186 | 187 | $snaked = trim(strtolower(preg_replace('/([A-Z])/', '_$1', $rule)), '_'); 188 | $tag .= sprintf(' rule="%s"', $snaked); 189 | 190 | if ($node instanceof Numbr || $node instanceof Date) { 191 | $comparisonOperator = match ($node->getComparisonOperator()) { 192 | ComparisonOperator::GT => 'gt', 193 | ComparisonOperator::GTE => 'gte', 194 | ComparisonOperator::LT => 'lt', 195 | ComparisonOperator::LTE => 'lte', 196 | default => null, 197 | }; 198 | 199 | if (null !== $comparisonOperator) { 200 | $tag .= sprintf(' comparison_operator="%s"', $comparisonOperator); 201 | } 202 | } 203 | 204 | $value = (string)$node->getValue(); 205 | if (preg_match('/[^a-zA-Z0-9\s!@#$%\^\*\(\)_\-+"\'\\{\}:;\?\.]+/', $value)) { 206 | $value = ''; 207 | } 208 | 209 | $this->printLine(sprintf('<%s>%s', $tag, $value, $node::NODE_TYPE)); 210 | } 211 | 212 | protected function printLine(string $line, bool $newLine = true): void 213 | { 214 | $this->result .= str_repeat(' ', $this->indent) . $line . ($newLine ? PHP_EOL : ''); 215 | } 216 | 217 | protected function indent(int $step = 2): void 218 | { 219 | $this->indent += $step; 220 | } 221 | 222 | protected function outdent(int $step = 2): void 223 | { 224 | $this->indent -= $step; 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/Enum/BoolOperator.php: -------------------------------------------------------------------------------- 1 | comparisonOperator = $comparisonOperator ?: ComparisonOperator::EQ; 34 | } 35 | 36 | public static function fromArray(array $data = []): self 37 | { 38 | $value = $data['value'] ?? ''; 39 | $useBoost = (bool)($data['use_boost'] ?? false); 40 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST); 41 | $useFuzzy = (bool)($data['use_fuzzy'] ?? false); 42 | $fuzzy = (int)($data['fuzzy'] ?? self::DEFAULT_FUZZY); 43 | 44 | try { 45 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null; 46 | } catch (\Throwable $e) { 47 | $boolOperator = null; 48 | } 49 | 50 | try { 51 | $comparisonOperator = isset($data['comparison_operator']) ? ComparisonOperator::from($data['comparison_operator']) : null; 52 | } catch (\Throwable $e) { 53 | $comparisonOperator = null; 54 | } 55 | 56 | return new self($value, $boolOperator, $useBoost, $boost, $useFuzzy, $fuzzy, $comparisonOperator); 57 | } 58 | 59 | public function toArray(): array 60 | { 61 | $array = parent::toArray(); 62 | if ($this->comparisonOperator === ComparisonOperator::EQ) { 63 | return $array; 64 | } 65 | 66 | $array['comparison_operator'] = $this->comparisonOperator->value; 67 | return $array; 68 | } 69 | 70 | public function useComparisonOperator(): bool 71 | { 72 | return $this->comparisonOperator !== ComparisonOperator::EQ; 73 | } 74 | 75 | public function getComparisonOperator(): ComparisonOperator 76 | { 77 | return $this->comparisonOperator; 78 | } 79 | 80 | /** 81 | * Always returns a DateTime in UTC. Use the time zone option to inform this class 82 | * that the value it holds is localized and should be converted to UTC. 83 | * 84 | * @param \DateTimeZone $timeZone 85 | * 86 | * @return \DateTimeInterface 87 | */ 88 | public function toDateTime(?\DateTimeZone $timeZone = null): \DateTimeInterface 89 | { 90 | if (null === self::$utc) { 91 | self::$utc = new \DateTimeZone('UTC'); 92 | } 93 | 94 | $date = \DateTime::createFromFormat('!Y-m-d', $this->getValue(), $timeZone ?: self::$utc); 95 | if (!$date instanceof \DateTimeInterface) { 96 | $date = \DateTime::createFromFormat('!Y-m-d', (new \DateTime())->format('Y-m-d'), $timeZone ?: self::$utc); 97 | } 98 | 99 | if ($date->getOffset() !== 0) { 100 | $date->setTimezone(self::$utc); 101 | } 102 | 103 | return $date; 104 | } 105 | 106 | public function acceptBuilder(QueryBuilder $builder): void 107 | { 108 | $builder->addDate($this); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/Node/DateRange.php: -------------------------------------------------------------------------------- 1 | addEmoji($this); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Node/Emoticon.php: -------------------------------------------------------------------------------- 1 | addEmoticon($this); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Node/Field.php: -------------------------------------------------------------------------------- 1 | 'real_field_name']. 16 | * For example: plays:>100 should actually be: plays_count:>100. 17 | * 18 | * @var array 19 | */ 20 | public static array $aliases = []; 21 | private Node $node; 22 | 23 | public function __construct( 24 | string $fieldName, 25 | Node $node, 26 | ?BoolOperator $boolOperator = null, 27 | bool $useBoost = false, 28 | float $boost = self::DEFAULT_BOOST 29 | ) { 30 | if (isset(self::$aliases[$fieldName])) { 31 | $fieldName = self::$aliases[$fieldName]; 32 | } 33 | 34 | parent::__construct($fieldName, $boolOperator, $useBoost, $boost); 35 | $this->node = $node; 36 | 37 | if ($this->node instanceof Field) { 38 | throw new \LogicException('A Field cannot contain another field.'); 39 | } 40 | } 41 | 42 | public static function fromArray(array $data = []): self 43 | { 44 | $value = $data['value'] ?? ''; 45 | $useBoost = (bool)($data['use_boost'] ?? false); 46 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST); 47 | 48 | try { 49 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null; 50 | } catch (\Throwable $e) { 51 | $boolOperator = null; 52 | } 53 | 54 | /** @var Node $node */ 55 | $node = isset($data['node']) ? self::factory($data['node']) : null; 56 | 57 | return new self($value, $node, $boolOperator, $useBoost, $boost); 58 | } 59 | 60 | public function toArray(): array 61 | { 62 | $array = parent::toArray(); 63 | $array['node'] = $this->node->toArray(); 64 | return $array; 65 | } 66 | 67 | public function getName(): string 68 | { 69 | return $this->getValue(); 70 | } 71 | 72 | public function getNode(): Node 73 | { 74 | return $this->node; 75 | } 76 | 77 | public function hasCompoundNode(): bool 78 | { 79 | return $this->node->isCompoundNode(); 80 | } 81 | 82 | public function acceptBuilder(QueryBuilder $builder): void 83 | { 84 | $builder->addField($this); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/Node/Hashtag.php: -------------------------------------------------------------------------------- 1 | addHashtag($this); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Node/Mention.php: -------------------------------------------------------------------------------- 1 | addMention($this); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Node/Node.php: -------------------------------------------------------------------------------- 1 | value = $value; 44 | $this->boolOperator = $boolOperator ?: BoolOperator::OPTIONAL; 45 | 46 | $this->useBoost = $useBoost && static::SUPPORTS_BOOST && $this->boolOperator === BoolOperator::OPTIONAL; 47 | if ($this->useBoost) { 48 | $this->boost = $boost; 49 | if ($this->boost < static::MIN_BOOST) { 50 | $this->boost = static::MIN_BOOST; 51 | } 52 | 53 | if ($this->boost > static::MAX_BOOST) { 54 | $this->boost = static::MAX_BOOST; 55 | } 56 | } 57 | 58 | $this->useFuzzy = $useFuzzy && static::SUPPORTS_FUZZY && $this->boolOperator === BoolOperator::OPTIONAL; 59 | if ($this->useFuzzy) { 60 | $this->fuzzy = min(max($fuzzy, static::MIN_FUZZY), static::MAX_FUZZY); 61 | } 62 | } 63 | 64 | public static function factory(array $data = []): self 65 | { 66 | $type = $data['type']; 67 | // fix for php7 reserved name (scalar type hint) 68 | if ('number' === $type) { 69 | $type = 'numbr'; 70 | } 71 | 72 | /** @var Node $class */ 73 | $camel = str_replace(' ', '', ucwords(str_replace('_', ' ', $type))); 74 | $class = 'Gdbots\QueryParser\Node\\' . $camel; 75 | if (!class_exists($class)) { 76 | throw new \InvalidArgumentException(sprintf('Node type [%s] does not exist.', $type)); 77 | } 78 | 79 | return $class::fromArray($data); 80 | } 81 | 82 | public function toArray(): array 83 | { 84 | $array = ['type' => static::NODE_TYPE]; 85 | 86 | if ($this->hasValue()) { 87 | $array['value'] = $this->value; 88 | } 89 | 90 | if (!$this->isOptional()) { 91 | $array['bool_operator'] = $this->boolOperator->value; 92 | } 93 | 94 | if ($this->useBoost) { 95 | $array['use_boost'] = $this->useBoost; 96 | $array['boost'] = $this->boost; 97 | } 98 | 99 | if ($this->useFuzzy) { 100 | $array['use_fuzzy'] = $this->useFuzzy; 101 | $array['fuzzy'] = $this->fuzzy; 102 | } 103 | 104 | return $array; 105 | } 106 | 107 | final public function jsonSerialize(): array 108 | { 109 | return $this->toArray(); 110 | } 111 | 112 | final public function hasValue(): bool 113 | { 114 | return null !== $this->value && '' !== $this->value; 115 | } 116 | 117 | final public function getValue() 118 | { 119 | return $this->value; 120 | } 121 | 122 | final public function getBoolOperator(): BoolOperator 123 | { 124 | return $this->boolOperator; 125 | } 126 | 127 | final public function isOptional(): bool 128 | { 129 | return $this->boolOperator === BoolOperator::OPTIONAL; 130 | } 131 | 132 | final public function isRequired(): bool 133 | { 134 | return $this->boolOperator === BoolOperator::REQUIRED; 135 | } 136 | 137 | final public function isProhibited(): bool 138 | { 139 | return $this->boolOperator === BoolOperator::PROHIBITED; 140 | } 141 | 142 | final public function isCompoundNode(): bool 143 | { 144 | return static::COMPOUND_NODE; 145 | } 146 | 147 | public function useComparisonOperator(): bool 148 | { 149 | return false; 150 | } 151 | 152 | final public function useBoost(): bool 153 | { 154 | return $this->useBoost; 155 | } 156 | 157 | final public function getBoost(): float 158 | { 159 | return $this->boost; 160 | } 161 | 162 | final public function useFuzzy(): bool 163 | { 164 | return $this->useFuzzy; 165 | } 166 | 167 | final public function getFuzzy(): int 168 | { 169 | return $this->fuzzy; 170 | } 171 | 172 | public function acceptBuilder(QueryBuilder $builder): void 173 | { 174 | // do nothing 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/Node/NumberRange.php: -------------------------------------------------------------------------------- 1 | comparisonOperator = $comparisonOperator ?: ComparisonOperator::EQ; 21 | } 22 | 23 | public static function fromArray(array $data = []): self 24 | { 25 | $value = (float)($data['value'] ?? 0.0); 26 | 27 | try { 28 | $comparisonOperator = isset($data['comparison_operator']) ? ComparisonOperator::from($data['comparison_operator']) : null; 29 | } catch (\Throwable $e) { 30 | $comparisonOperator = null; 31 | } 32 | 33 | return new self($value, $comparisonOperator); 34 | } 35 | 36 | public function toArray(): array 37 | { 38 | $array = parent::toArray(); 39 | if ($this->comparisonOperator === ComparisonOperator::EQ) { 40 | return $array; 41 | } 42 | 43 | $array['comparison_operator'] = $this->comparisonOperator->value; 44 | return $array; 45 | } 46 | 47 | public function useComparisonOperator(): bool 48 | { 49 | return $this->comparisonOperator !== ComparisonOperator::EQ; 50 | } 51 | 52 | public function getComparisonOperator(): ComparisonOperator 53 | { 54 | return $this->comparisonOperator; 55 | } 56 | 57 | public function acceptBuilder(QueryBuilder $builder): void 58 | { 59 | $builder->addNumber($this); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/Node/Phrase.php: -------------------------------------------------------------------------------- 1 | addPhrase($this); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/Node/Range.php: -------------------------------------------------------------------------------- 1 | lowerNode = $lowerNode; 21 | $this->upperNode = $upperNode; 22 | $this->exclusive = $exclusive; 23 | 24 | if (null === $this->lowerNode && null === $this->upperNode) { 25 | throw new \LogicException('Range requires at least a lower or upper node.'); 26 | } 27 | } 28 | 29 | final public static function fromArray(array $data = []): self 30 | { 31 | $lowerNode = isset($data['lower_node']) ? self::factory($data['lower_node']) : null; 32 | $upperNode = isset($data['upper_node']) ? self::factory($data['upper_node']) : null; 33 | $exclusive = isset($data['exclusive']) ? (bool)$data['exclusive'] : false; 34 | return new static($lowerNode, $upperNode, $exclusive); 35 | } 36 | 37 | final public function toArray(): array 38 | { 39 | $array = parent::toArray(); 40 | 41 | if (null !== $this->lowerNode) { 42 | $array['lower_node'] = $this->lowerNode; 43 | } 44 | 45 | if (null !== $this->upperNode) { 46 | $array['upper_node'] = $this->upperNode; 47 | } 48 | 49 | if ($this->exclusive) { 50 | $array['exclusive'] = $this->exclusive; 51 | } 52 | 53 | return $array; 54 | } 55 | 56 | final public function hasLowerNode(): bool 57 | { 58 | return null !== $this->lowerNode; 59 | } 60 | 61 | public function getLowerNode(): ?Node 62 | { 63 | return $this->lowerNode; 64 | } 65 | 66 | final public function hasUpperNode(): bool 67 | { 68 | return null !== $this->upperNode; 69 | } 70 | 71 | public function getUpperNode(): ?Node 72 | { 73 | return $this->upperNode; 74 | } 75 | 76 | final public function isInclusive(): bool 77 | { 78 | return !$this->exclusive; 79 | } 80 | 81 | final public function isExclusive(): bool 82 | { 83 | return $this->exclusive; 84 | } 85 | 86 | final public function acceptBuilder(QueryBuilder $builder): void 87 | { 88 | $builder->addRange($this); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/Node/Subquery.php: -------------------------------------------------------------------------------- 1 | nodes = $nodes; 25 | 26 | foreach ($this->nodes as $node) { 27 | if ($node->isCompoundNode()) { 28 | throw new \LogicException('A Subquery cannot contain compound nodes. (Field, Range, Subquery)'); 29 | } 30 | } 31 | } 32 | 33 | public static function fromArray(array $data = []): self 34 | { 35 | $useBoost = (bool)($data['use_boost'] ?? false); 36 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST); 37 | 38 | $nodes = []; 39 | if (isset($data['nodes'])) { 40 | foreach ($data['nodes'] as $node) { 41 | $nodes[] = self::factory($node); 42 | } 43 | } 44 | 45 | try { 46 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null; 47 | } catch (\Throwable $e) { 48 | $boolOperator = null; 49 | } 50 | 51 | return new self($nodes, $boolOperator, $useBoost, $boost); 52 | } 53 | 54 | public function toArray(): array 55 | { 56 | $array = parent::toArray(); 57 | $array['nodes'] = []; 58 | 59 | foreach ($this->nodes as $node) { 60 | $array['nodes'][] = $node->toArray(); 61 | } 62 | 63 | return $array; 64 | } 65 | 66 | /** 67 | * @return Node[] 68 | */ 69 | public function getNodes(): array 70 | { 71 | return $this->nodes; 72 | } 73 | 74 | public function acceptBuilder(QueryBuilder $builder): void 75 | { 76 | $builder->addSubquery($this); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/Node/Url.php: -------------------------------------------------------------------------------- 1 | addUrl($this); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/Node/Word.php: -------------------------------------------------------------------------------- 1 | trailingWildcard = $trailingWildcard; 33 | } 34 | 35 | public static function fromArray(array $data = []): self 36 | { 37 | $value = $data['value'] ?? ''; 38 | $useBoost = (bool)($data['use_boost'] ?? false); 39 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST); 40 | $useFuzzy = (bool)($data['use_fuzzy'] ?? false); 41 | $fuzzy = (int)($data['fuzzy'] ?? self::DEFAULT_FUZZY); 42 | $trailingWildcard = (bool)($data['trailing_wildcard'] ?? false); 43 | 44 | try { 45 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null; 46 | } catch (\Throwable $e) { 47 | $boolOperator = null; 48 | } 49 | 50 | return new self($value, $boolOperator, $useBoost, $boost, $useFuzzy, $fuzzy, $trailingWildcard); 51 | } 52 | 53 | public function toArray(): array 54 | { 55 | $array = parent::toArray(); 56 | if (!$this->trailingWildcard) { 57 | return $array; 58 | } 59 | 60 | $array['trailing_wildcard'] = $this->trailingWildcard; 61 | return $array; 62 | } 63 | 64 | public function hasTrailingWildcard(): bool 65 | { 66 | return $this->trailingWildcard; 67 | } 68 | 69 | public function isStopWord(): bool 70 | { 71 | return in_array(strtolower($this->getValue()), self::$stopWords); 72 | } 73 | 74 | public function acceptBuilder(QueryBuilder $builder): void 75 | { 76 | $builder->addWord($this); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/Node/WordRange.php: -------------------------------------------------------------------------------- 1 | addNode(Node::factory($v)); 20 | } 21 | 22 | return $obj; 23 | } 24 | 25 | public function toArray(): array 26 | { 27 | return $this->nodes; 28 | } 29 | 30 | public function jsonSerialize(): array 31 | { 32 | return $this->toArray(); 33 | } 34 | 35 | /** 36 | * @param Node[] $nodes 37 | * 38 | * @return self 39 | */ 40 | public function addNodes(array $nodes): self 41 | { 42 | foreach ($nodes as $node) { 43 | $this->addNode($node); 44 | } 45 | 46 | return $this; 47 | } 48 | 49 | /** 50 | * @param Node $node 51 | * 52 | * @return self 53 | */ 54 | public function addNode(Node $node): self 55 | { 56 | $this->nodes[] = $node; 57 | $this->nodesByType[$node::NODE_TYPE][] = $node; 58 | return $this; 59 | } 60 | 61 | /** 62 | * @return Node[] 63 | */ 64 | public function getNodes(): array 65 | { 66 | return $this->nodes; 67 | } 68 | 69 | /** 70 | * @param string $type 71 | * 72 | * @return Node[] 73 | */ 74 | public function getNodesOfType(string $type): array 75 | { 76 | return isset($this->nodesByType[$type]) ? $this->nodesByType[$type] : []; 77 | } 78 | 79 | /** 80 | * Returns true if the parsed query contains at least one request for an item 81 | * matching the query. If all of the nodes are "prohibited" values it 82 | * can easily review your entire index. 83 | * 84 | * @return bool 85 | */ 86 | public function hasAMatchableNode(): bool 87 | { 88 | foreach ($this->nodes as $node) { 89 | if (!$node->isProhibited()) { 90 | return true; 91 | } 92 | } 93 | 94 | return false; 95 | } 96 | 97 | /** 98 | * Returns an array of fields (specifically the field names) that are 99 | * used in this query. e.g. "status:active", "status" is the field name. 100 | * 101 | * @return string[] 102 | */ 103 | public function getFieldsUsed(): array 104 | { 105 | $fields = []; 106 | 107 | /** @var Field $node */ 108 | foreach ($this->getNodesOfType(Field::NODE_TYPE) as $node) { 109 | $fields[$node->getName()] = true; 110 | } 111 | 112 | return array_keys($fields); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/QueryParser.php: -------------------------------------------------------------------------------- 1 | tokenizer = new Tokenizer(); 39 | } 40 | 41 | public function parse(string $input): ParsedQuery 42 | { 43 | $this->stream = $this->tokenizer->scan($input); 44 | $query = new ParsedQuery(); 45 | 46 | while ($this->stream->next()) { 47 | $boolOperator = $this->getBoolOperator(); 48 | $token = $this->stream->getCurrent(); 49 | if ($token->typeEquals(Token::T_EOI)) { 50 | break; 51 | } 52 | 53 | $query->addNodes($this->createNodes($token, $boolOperator)); 54 | } 55 | 56 | return $query; 57 | } 58 | 59 | /** 60 | * @param Token $token 61 | * @param BoolOperator $boolOperator 62 | * @param ComparisonOperator $comparisonOperator 63 | * 64 | * @return Node[] 65 | */ 66 | private function createNodes( 67 | Token $token, 68 | BoolOperator $boolOperator, 69 | ?ComparisonOperator $comparisonOperator = null 70 | ): array { 71 | switch ($token->getType()) { 72 | case Token::T_WORD: 73 | $nodes = $this->createWord($token->getValue(), $boolOperator); 74 | break; 75 | 76 | case Token::T_DATE: 77 | $nodes = $this->createDate($token->getValue(), $boolOperator, $comparisonOperator); 78 | break; 79 | 80 | case Token::T_EMOJI: 81 | $nodes = $this->createEmoji($token->getValue(), $boolOperator); 82 | break; 83 | 84 | case Token::T_EMOTICON: 85 | $nodes = $this->createEmoticon($token->getValue(), $boolOperator); 86 | break; 87 | 88 | case Token::T_FIELD_START: 89 | $nodes = $this->handleField($token->getValue(), $boolOperator); 90 | break; 91 | 92 | case Token::T_HASHTAG: 93 | $nodes = $this->createHashtag($token->getValue(), $boolOperator); 94 | break; 95 | 96 | case Token::T_MENTION: 97 | $nodes = $this->createMention($token->getValue(), $boolOperator); 98 | break; 99 | 100 | case Token::T_NUMBER: 101 | $nodes = $this->createNumber($token->getValue(), $comparisonOperator); 102 | break; 103 | 104 | case Token::T_PHRASE: 105 | $nodes = $this->createPhrase($token->getValue(), $boolOperator); 106 | break; 107 | 108 | case Token::T_SUBQUERY_START: 109 | $nodes = $this->handleSubquery($boolOperator); 110 | break; 111 | 112 | case Token::T_URL: 113 | $nodes = $this->createUrl($token->getValue(), $boolOperator); 114 | break; 115 | 116 | default: 117 | $nodes = []; 118 | break; 119 | } 120 | 121 | return $nodes instanceof Node ? [$nodes] : $nodes; 122 | } 123 | 124 | /** 125 | * @param string $fieldName 126 | * @param BoolOperator $boolOperator 127 | * 128 | * @return Field|Node[]|Node 129 | */ 130 | private function handleField(string $fieldName, BoolOperator $boolOperator) 131 | { 132 | $lookahead = $this->stream->getLookahead(); 133 | if (!$lookahead instanceof Token) { 134 | return $this->createWord($fieldName, $boolOperator); 135 | } 136 | 137 | $this->stream->next(); 138 | 139 | switch ($lookahead->getType()) { 140 | case Token::T_RANGE_INCL_START: 141 | case Token::T_RANGE_EXCL_START: 142 | return $this->handleFieldWithRange($fieldName, $boolOperator); 143 | 144 | case Token::T_SUBQUERY_START: 145 | return $this->handleFieldWithSubquery($fieldName, $boolOperator); 146 | 147 | case Token::T_FIELD_END: 148 | return $this->createWord($fieldName, $boolOperator); 149 | 150 | default: 151 | break; 152 | } 153 | 154 | $this->stream->nextIfAnyOf([ 155 | Token::T_REQUIRED, 156 | Token::T_PROHIBITED, 157 | Token::T_WILDCARD, 158 | Token::T_FUZZY, 159 | Token::T_BOOST, 160 | ]); 161 | 162 | $comparisonOperator = $this->getComparisonOperator(); 163 | $fieldValue = $this->stream->getCurrent(); 164 | $nodes = $this->createNodes($fieldValue, BoolOperator::OPTIONAL, $comparisonOperator); 165 | $this->stream->skipUntil(Token::T_FIELD_END); 166 | 167 | if (empty($nodes)) { 168 | return $this->createWord($fieldName, $boolOperator); 169 | } 170 | 171 | if (count($nodes) > 1) { 172 | return $nodes; 173 | } 174 | 175 | $m = $this->getModifiers(); 176 | return new Field($fieldName, $nodes[0], $boolOperator, $m['use_boost'], $m['boost']); 177 | } 178 | 179 | /** 180 | * @param string $fieldName 181 | * @param BoolOperator $boolOperator 182 | * 183 | * @return Field|Node[]|Node 184 | */ 185 | private function handleFieldWithRange(string $fieldName, BoolOperator $boolOperator) 186 | { 187 | $exclusive = $this->stream->typeIs(Token::T_RANGE_EXCL_START); 188 | $matchTypes = true; 189 | $this->stream->next(); 190 | 191 | switch ($this->stream->getCurrent()->getType()) { 192 | case Token::T_NUMBER: 193 | $lowerNode = $this->createNumber($this->stream->getCurrent()->getValue()); 194 | break; 195 | 196 | case Token::T_DATE: 197 | $lowerNode = $this->createDate($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL); 198 | break; 199 | 200 | case Token::T_WORD: 201 | $lowerNode = $this->createWord($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL); 202 | break; 203 | 204 | default: 205 | $lowerNode = null; 206 | $matchTypes = false; 207 | break; 208 | } 209 | 210 | $this->stream->skipUntil(Token::T_TO); 211 | $this->stream->nextIf(Token::T_TO); 212 | 213 | switch ($this->stream->getCurrent()->getType()) { 214 | case Token::T_NUMBER: 215 | $upperNode = $this->createNumber($this->stream->getCurrent()->getValue()); 216 | break; 217 | 218 | case Token::T_DATE: 219 | $upperNode = $this->createDate($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL); 220 | break; 221 | 222 | case Token::T_WORD: 223 | $upperNode = $this->createWord($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL); 224 | break; 225 | 226 | default: 227 | $upperNode = null; 228 | $matchTypes = false; 229 | break; 230 | } 231 | 232 | $this->stream->skipUntil(Token::T_FIELD_END); 233 | 234 | // todo: add field name and/or nodes that aren't null as words? 235 | // todo: handle mismatched node 236 | if ($matchTypes && !$lowerNode instanceof $upperNode) { 237 | $nodes = []; 238 | 239 | if ($lowerNode instanceof Node) { 240 | $nodes[] = $lowerNode; 241 | } 242 | 243 | if ($upperNode instanceof Node) { 244 | $nodes[] = $upperNode; 245 | } 246 | 247 | if (empty($nodes)) { 248 | return $this->createWord($fieldName, $boolOperator); 249 | } 250 | 251 | $m = $this->getModifiers(); 252 | 253 | if (count($nodes) === 1) { 254 | return new Field($fieldName, $nodes[0], $boolOperator, $m['use_boost'], $m['boost']); 255 | } 256 | 257 | $subquery = new Subquery($nodes, null, $m['use_boost'], $m['boost']); 258 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']); 259 | } 260 | 261 | $m = $this->getModifiers(); 262 | 263 | if ($lowerNode instanceof Numbr || $upperNode instanceof Numbr) { 264 | $range = new NumberRange($lowerNode, $upperNode, $exclusive); 265 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']); 266 | } elseif ($lowerNode instanceof Date || $upperNode instanceof Date) { 267 | $range = new DateRange($lowerNode, $upperNode, $exclusive); 268 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']); 269 | } elseif ($lowerNode instanceof Word || $upperNode instanceof Word) { 270 | $range = new WordRange($lowerNode, $upperNode, $exclusive); 271 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']); 272 | } 273 | 274 | return $this->createWord($fieldName, $boolOperator); 275 | } 276 | 277 | /** 278 | * @param string $fieldName 279 | * @param BoolOperator $boolOperator 280 | * 281 | * @return Field|Node 282 | */ 283 | private function handleFieldWithSubquery(string $fieldName, BoolOperator $boolOperator): Node 284 | { 285 | $this->stream->nextIf(Token::T_SUBQUERY_START); 286 | $subquery = $this->handleSubquery($boolOperator); 287 | $this->stream->skipUntil(Token::T_FIELD_END); 288 | 289 | if ($subquery instanceof Subquery) { 290 | $m = $this->getModifiers(); 291 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']); 292 | } 293 | 294 | if (empty($subquery)) { 295 | return $this->createWord($fieldName, $boolOperator); 296 | } 297 | 298 | $m = $this->getModifiers(); 299 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']); 300 | } 301 | 302 | /** 303 | * @param BoolOperator $queryBoolOperator 304 | * 305 | * @return Subquery|Node[]|Node 306 | */ 307 | private function handleSubquery(BoolOperator $queryBoolOperator) 308 | { 309 | $this->stream->nextIf(Token::T_SUBQUERY_START); 310 | /** @var Node[] $nodes */ 311 | $nodes = []; 312 | 313 | do { 314 | $boolOperator = $this->getBoolOperator(); 315 | $comparisonOperator = $this->getComparisonOperator(); 316 | $nodes = array_merge( 317 | $nodes, 318 | $this->createNodes($this->stream->getCurrent(), $boolOperator, $comparisonOperator) 319 | ); 320 | 321 | if (!$this->stream->next()) { 322 | break; 323 | } 324 | } while (!$this->stream->typeIs(Token::T_SUBQUERY_END)); 325 | 326 | if (empty($nodes)) { 327 | return []; 328 | } 329 | 330 | $m = $this->getModifiers(); 331 | 332 | /* 333 | * if we only found one node within the subquery then we'll take the original query bool 334 | * operator, recreate the node with that (unless it has its own) and any modifiers found 335 | * and magically convert "+(cats)^5 to "+cats^5" or "-(+cats)~2 to "+cats~2" etc. 336 | */ 337 | if (count($nodes) === 1) { 338 | $data = $nodes[0]->toArray(); 339 | 340 | if (!isset($data['bool_operator'])) { 341 | $data['bool_operator'] = $queryBoolOperator->value; 342 | } 343 | 344 | if (!isset($data['use_boost'])) { 345 | $data['use_boost'] = $m['use_boost']; 346 | } 347 | 348 | if (!isset($data['boost'])) { 349 | $data['boost'] = $m['boost']; 350 | } 351 | 352 | if (!isset($data['use_fuzzy'])) { 353 | $data['use_fuzzy'] = $m['use_fuzzy']; 354 | } 355 | 356 | if (!isset($data['fuzzy'])) { 357 | $data['fuzzy'] = $m['fuzzy']; 358 | } 359 | 360 | if (!isset($data['trailing_wildcard'])) { 361 | $data['trailing_wildcard'] = $m['trailing_wildcard']; 362 | } 363 | 364 | return $nodes[0]::fromArray($data); 365 | } 366 | 367 | return new Subquery($nodes, $queryBoolOperator, $m['use_boost'], $m['boost']); 368 | } 369 | 370 | private function createDate( 371 | string $value, 372 | BoolOperator $boolOperator, 373 | ?ComparisonOperator $comparisonOperator = null 374 | ): Date { 375 | $m = $this->getModifiers(); 376 | return new Date( 377 | $value, 378 | $boolOperator, 379 | $m['use_boost'], 380 | $m['boost'], 381 | $m['use_fuzzy'], 382 | $m['fuzzy'], 383 | $comparisonOperator 384 | ); 385 | } 386 | 387 | private function createEmoji(string $value, BoolOperator $boolOperator): Emoji 388 | { 389 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator; 390 | $m = $this->getModifiers(); 391 | return new Emoji($value, $boolOperator, $m['use_boost'], $m['boost']); 392 | } 393 | 394 | private function createEmoticon(string $value, BoolOperator $boolOperator): Emoticon 395 | { 396 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator; 397 | $m = $this->getModifiers(); 398 | return new Emoticon($value, $boolOperator, $m['use_boost'], $m['boost']); 399 | } 400 | 401 | private function createHashtag(string $value, BoolOperator $boolOperator): Hashtag 402 | { 403 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator; 404 | $m = $this->getModifiers(); 405 | return new Hashtag($value, $boolOperator, $m['use_boost'], $m['boost']); 406 | } 407 | 408 | private function createMention(string $value, BoolOperator $boolOperator): Mention 409 | { 410 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator; 411 | $m = $this->getModifiers(); 412 | return new Mention($value, $boolOperator, $m['use_boost'], $m['boost']); 413 | } 414 | 415 | private function createNumber(float $value, ?ComparisonOperator $comparisonOperator = null): Numbr 416 | { 417 | // move the stream and ignore them if they exist 418 | $this->getModifiers(); 419 | return new Numbr($value, $comparisonOperator); 420 | } 421 | 422 | private function createPhrase(string $value, BoolOperator $boolOperator): Phrase 423 | { 424 | $m = $this->getModifiers(); 425 | return new Phrase($value, $boolOperator, $m['use_boost'], $m['boost'], $m['use_fuzzy'], $m['fuzzy']); 426 | } 427 | 428 | private function createUrl(string $value, BoolOperator $boolOperator): Url 429 | { 430 | $m = $this->getModifiers(); 431 | return new Url($value, $boolOperator, $m['use_boost'], $m['boost']); 432 | } 433 | 434 | private function createWord(string $value, BoolOperator $boolOperator): Word 435 | { 436 | $m = $this->getModifiers(); 437 | return new Word( 438 | $value, 439 | $boolOperator, 440 | $m['use_boost'], 441 | $m['boost'], 442 | $m['use_fuzzy'], 443 | $m['fuzzy'], 444 | $m['trailing_wildcard'] 445 | ); 446 | } 447 | 448 | private function getBoolOperator(): BoolOperator 449 | { 450 | if ($this->stream->nextIf(Token::T_REQUIRED) 451 | || $this->stream->lookaheadTypeIs(Token::T_AND) 452 | || $this->stream->prevTypeIs(Token::T_AND) 453 | ) { 454 | return BoolOperator::REQUIRED; 455 | } 456 | 457 | if ($this->stream->nextIf(Token::T_PROHIBITED)) { 458 | return BoolOperator::PROHIBITED; 459 | } 460 | 461 | return BoolOperator::OPTIONAL; 462 | } 463 | 464 | private function getComparisonOperator(): ?ComparisonOperator 465 | { 466 | if ($this->stream->nextIf(Token::T_GREATER_THAN)) { 467 | $op = ComparisonOperator::GT->value; 468 | } elseif ($this->stream->nextIf(Token::T_LESS_THAN)) { 469 | $op = ComparisonOperator::LT->value; 470 | } else { 471 | return null; 472 | } 473 | 474 | if ($this->stream->nextIf(Token::T_EQUALS)) { 475 | $op .= 'e'; 476 | } 477 | 478 | return ComparisonOperator::from($op); 479 | } 480 | 481 | private function getModifiers(): array 482 | { 483 | $array = [ 484 | 'trailing_wildcard' => $this->stream->nextIfLookahead(Token::T_WILDCARD), 485 | 'use_boost' => false, 486 | 'boost' => Node::DEFAULT_BOOST, 487 | 'use_fuzzy' => false, 488 | 'fuzzy' => Node::DEFAULT_FUZZY, 489 | ]; 490 | 491 | if ($this->stream->nextIfLookahead(Token::T_BOOST) && $this->stream->nextIfLookahead(Token::T_NUMBER)) { 492 | $array['use_boost'] = true; 493 | $array['boost'] = (float)$this->stream->getCurrent()->getValue(); 494 | } 495 | 496 | if ($this->stream->nextIfLookahead(Token::T_FUZZY)) { 497 | $array['use_fuzzy'] = true; 498 | if ($this->stream->nextIfLookahead(Token::T_NUMBER)) { 499 | $array['fuzzy'] = (int)$this->stream->getCurrent()->getValue(); 500 | } 501 | } 502 | 503 | return $array; 504 | } 505 | } 506 | -------------------------------------------------------------------------------- /src/Token.php: -------------------------------------------------------------------------------- 1 | ' 15 | const T_LESS_THAN = 7; // '<' 16 | const T_EQUALS = 8; // '=' 17 | const T_FUZZY = 9; // '~' 18 | const T_BOOST = 10; // '^' 19 | const T_RANGE_INCL_START = 11; // '[' 20 | const T_RANGE_INCL_END = 12; // ']' 21 | const T_RANGE_EXCL_START = 13; // '{' 22 | const T_RANGE_EXCL_END = 14; // '}' 23 | const T_SUBQUERY_START = 15; // '(' 24 | const T_SUBQUERY_END = 16; // ')' 25 | const T_WILDCARD = 17; // '*' 26 | const T_AND = 18; // 'AND' or '&&' 27 | const T_OR = 19; // 'OR' or '||' 28 | const T_TO = 20; // 'TO' or '..' 29 | const T_WORD = 21; 30 | const T_FIELD_START = 22; // The "field:" portion of "field:value". 31 | const T_FIELD_END = 23; // when a field lexeme ends, i.e. "field:value". This token has no value. 32 | const T_PHRASE = 24; // Phrase (one or more quoted words) 33 | const T_URL = 25; // a valid url 34 | const T_DATE = 26; // date in the format YYYY-MM-DD 35 | const T_HASHTAG = 27; // #hashtag 36 | const T_MENTION = 28; // @mention 37 | const T_EMOTICON = 29; // see https://en.wikipedia.org/wiki/Emoticon 38 | const T_EMOJI = 30; // see https://en.wikipedia.org/wiki/Emoji 39 | 40 | /** 41 | * Array of the type names by id (constants flipped) 42 | * 43 | * @var array 44 | */ 45 | private static array $typeNames; 46 | 47 | private int $type; 48 | 49 | /** @var string|float|null */ 50 | private $value; 51 | 52 | /** 53 | * @param int $type 54 | * @param string|float|null $value 55 | */ 56 | public function __construct(int $type, $value = null) 57 | { 58 | $this->type = $type; 59 | $this->value = $value; 60 | } 61 | 62 | /** 63 | * Gets the name of the type (a T_FOO constant) by its integer value. 64 | * 65 | * @param int $type 66 | * 67 | * @return string 68 | */ 69 | public static function name(int $type): string 70 | { 71 | if (null === self::$typeNames) { 72 | static::$typeNames = array_flip((new \ReflectionClass(__CLASS__))->getConstants()); 73 | } 74 | 75 | return self::$typeNames[$type] ?? (string)$type; 76 | } 77 | 78 | public function jsonSerialize(): array 79 | { 80 | return ['type' => $this->type, 'value' => $this->value]; 81 | } 82 | 83 | public function getTypeName(): string 84 | { 85 | return self::name($this->type); 86 | } 87 | 88 | public function getType(): int 89 | { 90 | return $this->type; 91 | } 92 | 93 | /** 94 | * @return string|float|null 95 | */ 96 | public function getValue() 97 | { 98 | return $this->value; 99 | } 100 | 101 | public function typeEquals(int $type): bool 102 | { 103 | return $type === $this->type; 104 | } 105 | 106 | /** 107 | * @param int[] $types 108 | * 109 | * @return bool 110 | */ 111 | public function typeEqualsAnyOf(array $types): bool 112 | { 113 | return in_array($this->type, $types, true); 114 | } 115 | 116 | public function isWhiteSpace(): bool 117 | { 118 | return self::T_WHITE_SPACE === $this->type; 119 | } 120 | 121 | public function isIgnored(): bool 122 | { 123 | return self::T_IGNORED === $this->type; 124 | } 125 | 126 | public function isEndOfInput(): bool 127 | { 128 | return self::T_EOI === $this->type; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/TokenStream.php: -------------------------------------------------------------------------------- 1 | tokens = $tokens; 26 | $this->reset(); 27 | } 28 | 29 | /** 30 | * Resets the stream. 31 | * 32 | * @return self 33 | */ 34 | public function reset(): self 35 | { 36 | $this->position = 0; 37 | $this->current = $this->tokens[$this->position] ?? self::$eoi; 38 | return $this; 39 | } 40 | 41 | /** 42 | * Increments the position and sets the current token to the previous token. 43 | * Returns true if the new "current" is not EOI. 44 | * 45 | * @return bool 46 | */ 47 | public function next(): bool 48 | { 49 | $this->current = isset($this->tokens[$this->position]) ? $this->tokens[$this->position++] : self::$eoi; 50 | return !$this->current->typeEquals(Token::T_EOI); 51 | } 52 | 53 | /** 54 | * Skips tokens until it sees a token with the given value. 55 | * 56 | * @param int $type 57 | */ 58 | public function skipUntil(int $type): void 59 | { 60 | while (!$this->current->typeEquals($type) && !$this->current->typeEquals(Token::T_EOI)) { 61 | $this->next(); 62 | } 63 | } 64 | 65 | /** 66 | * If the current token type matches the given type, move to the next token. 67 | * Returns true if next was fired. 68 | * 69 | * @param int $type 70 | * 71 | * @return bool 72 | */ 73 | public function nextIf(int $type): bool 74 | { 75 | if (!$this->current->typeEquals($type)) { 76 | return false; 77 | } 78 | 79 | $this->next(); 80 | return true; 81 | } 82 | 83 | /** 84 | * If the current token type matches any of the given types, move to the next token. 85 | * Returns true if next was fired. 86 | * 87 | * @param int[] $types 88 | * 89 | * @return bool 90 | */ 91 | public function nextIfAnyOf(array $types): bool 92 | { 93 | if (!$this->current->typeEqualsAnyOf($types)) { 94 | return false; 95 | } 96 | 97 | $this->next(); 98 | return true; 99 | } 100 | 101 | /** 102 | * If the lookahead token type matches the given type, move to the next token. 103 | * 104 | * @param int $type 105 | * 106 | * @return bool 107 | */ 108 | public function nextIfLookahead(int $type): bool 109 | { 110 | if (!isset($this->tokens[$this->position]) || !$this->tokens[$this->position]->typeEquals($type)) { 111 | return false; 112 | } 113 | 114 | $this->next(); 115 | return true; 116 | } 117 | 118 | /** 119 | * If the lookahead token type matches any of the given types, move to the next token. 120 | * 121 | * @param int[] $types 122 | * 123 | * @return bool 124 | */ 125 | public function nextIfLookaheadAnyOf(array $types): bool 126 | { 127 | if (!isset($this->tokens[$this->position]) || !$this->tokens[$this->position]->typeEqualsAnyOf($types)) { 128 | return false; 129 | } 130 | 131 | $this->next(); 132 | return true; 133 | } 134 | 135 | /** 136 | * Returns true if the current type equals the given type. 137 | * 138 | * @param int $type 139 | * 140 | * @return bool 141 | */ 142 | public function typeIs(int $type): bool 143 | { 144 | return $this->current->typeEquals($type); 145 | } 146 | 147 | /** 148 | * Returns true if the current type equals any of the given types. 149 | * 150 | * @param int[] $types 151 | * 152 | * @return bool 153 | */ 154 | public function typeIsAnyOf(array $types): bool 155 | { 156 | return $this->current->typeEqualsAnyOf($types); 157 | } 158 | 159 | /** 160 | * Returns true if the lookahead type equals the given type. 161 | * 162 | * @param int $type 163 | * 164 | * @return bool 165 | */ 166 | public function lookaheadTypeIs(int $type): bool 167 | { 168 | return isset($this->tokens[$this->position]) && $this->tokens[$this->position]->typeEquals($type); 169 | } 170 | 171 | /** 172 | * Returns true if the lookahead type equals any of the given types. 173 | * 174 | * @param int[] $types 175 | * 176 | * @return bool 177 | */ 178 | public function lookaheadTypeIsAnyOf(array $types): bool 179 | { 180 | return isset($this->tokens[$this->position]) && $this->tokens[$this->position]->typeEqualsAnyOf($types); 181 | } 182 | 183 | /** 184 | * Returns true if the previous token type equals the given type. 185 | * 186 | * @param int $type 187 | * 188 | * @return bool 189 | */ 190 | public function prevTypeIs(int $type): bool 191 | { 192 | return isset($this->tokens[$this->position - 2]) && $this->tokens[$this->position - 2]->typeEquals($type); 193 | } 194 | 195 | /** 196 | * Returns true if the previous token type equals any of the given types. 197 | * 198 | * @param int[] $types 199 | * 200 | * @return bool 201 | */ 202 | public function prevTypeIsAnyOf(array $types): bool 203 | { 204 | return isset($this->tokens[$this->position - 2]) && $this->tokens[$this->position - 2]->typeEqualsAnyOf($types); 205 | } 206 | 207 | public function getCurrent(): Token 208 | { 209 | return $this->current; 210 | } 211 | 212 | public function getLookahead(): ?Token 213 | { 214 | return $this->tokens[$this->position] ?: null; 215 | } 216 | 217 | /** 218 | * Returns all tokens in this stream. 219 | * 220 | * @return Token[] 221 | */ 222 | public function getTokens(): array 223 | { 224 | return $this->tokens; 225 | } 226 | 227 | public function jsonSerialize(): array 228 | { 229 | return $this->tokens; 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /src/Tokenizer.php: -------------------------------------------------------------------------------- 1 | :\-?\(|:\-?\)|<3|:\'\(|:\-?\|:\-?\/|:\-?\(|:\-?\*|:\-?\||:o\)|:\-?o|=\-?\)|:\-?D|:\-?p|:\-?P|:\-?b|;\-?p|;\-?P|;\-?b|;\-?\))'; 9 | const REGEX_EMOJI = '[\x{2712}\x{2714}\x{2716}\x{271d}\x{2721}\x{2728}\x{2733}\x{2734}\x{2744}\x{2747}\x{274c}\x{274e}\x{2753}-\x{2755}\x{2757}\x{2763}\x{2764}\x{2795}-\x{2797}\x{27a1}\x{27b0}\x{27bf}\x{2934}\x{2935}\x{2b05}-\x{2b07}\x{2b1b}\x{2b1c}\x{2b50}\x{2b55}\x{3030}\x{303d}\x{1f004}\x{1f0cf}\x{1f170}\x{1f171}\x{1f17e}\x{1f17f}\x{1f18e}\x{1f191}-\x{1f19a}\x{1f201}\x{1f202}\x{1f21a}\x{1f22f}\x{1f232}-\x{1f23a}\x{1f250}\x{1f251}\x{1f300}-\x{1f321}\x{1f324}-\x{1f393}\x{1f396}\x{1f397}\x{1f399}-\x{1f39b}\x{1f39e}-\x{1f3f0}\x{1f3f3}-\x{1f3f5}\x{1f3f7}-\x{1f4fd}\x{1f4ff}-\x{1f53d}\x{1f549}-\x{1f54e}\x{1f550}-\x{1f567}\x{1f56f}\x{1f570}\x{1f573}-\x{1f579}\x{1f587}\x{1f58a}-\x{1f58d}\x{1f590}\x{1f595}\x{1f596}\x{1f5a5}\x{1f5a8}\x{1f5b1}\x{1f5b2}\x{1f5bc}\x{1f5c2}-\x{1f5c4}\x{1f5d1}-\x{1f5d3}\x{1f5dc}-\x{1f5de}\x{1f5e1}\x{1f5e3}\x{1f5ef}\x{1f5f3}\x{1f5fa}-\x{1f64f}\x{1f680}-\x{1f6c5}\x{1f6cb}-\x{1f6d0}\x{1f6e0}-\x{1f6e5}\x{1f6e9}\x{1f6eb}\x{1f6ec}\x{1f6f0}\x{1f6f3}\x{1f910}-\x{1f918}\x{1f980}-\x{1f984}\x{1f9c0}\x{3297}\x{3299}\x{a9}\x{ae}\x{203c}\x{2049}\x{2122}\x{2139}\x{2194}-\x{2199}\x{21a9}\x{21aa}\x{231a}\x{231b}\x{2328}\x{2388}\x{23cf}\x{23e9}-\x{23f3}\x{23f8}-\x{23fa}\x{24c2}\x{25aa}\x{25ab}\x{25b6}\x{25c0}\x{25fb}-\x{25fe}\x{2600}-\x{2604}\x{260e}\x{2611}\x{2614}\x{2615}\x{2618}\x{261d}\x{2620}\x{2622}\x{2623}\x{2626}\x{262a}\x{262e}\x{262f}\x{2638}-\x{263a}\x{2648}-\x{2653}\x{2660}\x{2663}\x{2665}\x{2666}\x{2668}\x{267b}\x{267f}\x{2692}-\x{2694}\x{2696}\x{2697}\x{2699}\x{269b}\x{269c}\x{26a0}\x{26a1}\x{26aa}\x{26ab}\x{26b0}\x{26b1}\x{26bd}\x{26be}\x{26c4}\x{26c5}\x{26c8}\x{26ce}\x{26cf}\x{26d1}\x{26d3}\x{26d4}\x{26e9}\x{26ea}\x{26f0}-\x{26f5}\x{26f7}-\x{26fa}\x{26fd}\x{2702}\x{2705}\x{2708}-\x{270d}\x{270f}]|\x{23}\x{20e3}|\x{2a}\x{20e3}|\x{30}\x{20e3}|\x{31}\x{20e3}|\x{32}\x{20e3}|\x{33}\x{20e3}|\x{34}\x{20e3}|\x{35}\x{20e3}|\x{36}\x{20e3}|\x{37}\x{20e3}|\x{38}\x{20e3}|\x{39}\x{20e3}|\x{1f1e6}[\x{1f1e8}-\x{1f1ec}\x{1f1ee}\x{1f1f1}\x{1f1f2}\x{1f1f4}\x{1f1f6}-\x{1f1fa}\x{1f1fc}\x{1f1fd}\x{1f1ff}]|\x{1f1e7}[\x{1f1e6}\x{1f1e7}\x{1f1e9}-\x{1f1ef}\x{1f1f1}-\x{1f1f4}\x{1f1f6}-\x{1f1f9}\x{1f1fb}\x{1f1fc}\x{1f1fe}\x{1f1ff}]|\x{1f1e8}[\x{1f1e6}\x{1f1e8}\x{1f1e9}\x{1f1eb}-\x{1f1ee}\x{1f1f0}-\x{1f1f5}\x{1f1f7}\x{1f1fa}-\x{1f1ff}]|\x{1f1e9}[\x{1f1ea}\x{1f1ec}\x{1f1ef}\x{1f1f0}\x{1f1f2}\x{1f1f4}\x{1f1ff}]|\x{1f1ea}[\x{1f1e6}\x{1f1e8}\x{1f1ea}\x{1f1ec}\x{1f1ed}\x{1f1f7}-\x{1f1fa}]|\x{1f1eb}[\x{1f1ee}-\x{1f1f0}\x{1f1f2}\x{1f1f4}\x{1f1f7}]|\x{1f1ec}[\x{1f1e6}\x{1f1e7}\x{1f1e9}-\x{1f1ee}\x{1f1f1}-\x{1f1f3}\x{1f1f5}-\x{1f1fa}\x{1f1fc}\x{1f1fe}]|\x{1f1ed}[\x{1f1f0}\x{1f1f2}\x{1f1f3}\x{1f1f7}\x{1f1f9}\x{1f1fa}]|\x{1f1ee}[\x{1f1e8}-\x{1f1ea}\x{1f1f1}-\x{1f1f4}\x{1f1f6}-\x{1f1f9}]|\x{1f1ef}[\x{1f1ea}\x{1f1f2}\x{1f1f4}\x{1f1f5}]|\x{1f1f0}[\x{1f1ea}\x{1f1ec}-\x{1f1ee}\x{1f1f2}\x{1f1f3}\x{1f1f5}\x{1f1f7}\x{1f1fc}\x{1f1fe}\x{1f1ff}]|\x{1f1f1}[\x{1f1e6}-\x{1f1e8}\x{1f1ee}\x{1f1f0}\x{1f1f7}-\x{1f1fb}\x{1f1fe}]|\x{1f1f2}[\x{1f1e6}\x{1f1e8}-\x{1f1ed}\x{1f1f0}-\x{1f1ff}]|\x{1f1f3}[\x{1f1e6}\x{1f1e8}\x{1f1ea}-\x{1f1ec}\x{1f1ee}\x{1f1f1}\x{1f1f4}\x{1f1f5}\x{1f1f7}\x{1f1fa}\x{1f1ff}]|\x{1f1f4}\x{1f1f2}|\x{1f1f5}[\x{1f1e6}\x{1f1ea}-\x{1f1ed}\x{1f1f0}-\x{1f1f3}\x{1f1f7}-\x{1f1f9}\x{1f1fc}\x{1f1fe}]|\x{1f1f6}\x{1f1e6}|\x{1f1f7}[\x{1f1ea}\x{1f1f4}\x{1f1f8}\x{1f1fa}\x{1f1fc}]|\x{1f1f8}[\x{1f1e6}-\x{1f1ea}\x{1f1ec}-\x{1f1f4}\x{1f1f7}-\x{1f1f9}\x{1f1fb}\x{1f1fd}-\x{1f1ff}]|\x{1f1f9}[\x{1f1e6}\x{1f1e8}\x{1f1e9}\x{1f1eb}-\x{1f1ed}\x{1f1ef}-\x{1f1f4}\x{1f1f7}\x{1f1f9}\x{1f1fb}\x{1f1fc}\x{1f1ff}]|\x{1f1fa}[\x{1f1e6}\x{1f1ec}\x{1f1f2}\x{1f1f8}\x{1f1fe}\x{1f1ff}]|\x{1f1fb}[\x{1f1e6}\x{1f1e8}\x{1f1ea}\x{1f1ec}\x{1f1ee}\x{1f1f3}\x{1f1fa}]|\x{1f1fc}[\x{1f1eb}\x{1f1f8}]|\x{1f1fd}\x{1f1f0}|\x{1f1fe}[\x{1f1ea}\x{1f1f9}]|\x{1f1ff}[\x{1f1e6}\x{1f1f2}\x{1f1fc}]'; 10 | const REGEX_URL = '[+-]?[\w-]+:\/\/[^\s\/$.?#].[^\s\^~]*'; 11 | const REGEX_PHRASE = '[+-]?"(?:""|[^"])*"'; 12 | const REGEX_HASHTAG = '[+-]?#+[a-zA-Z0-9_]+'; 13 | const REGEX_MENTION = '[+-]?@+[a-zA-Z0-9_]+(?:[a-zA-Z0-9_\.\-]+)?'; 14 | const REGEX_NUMBER = '(?:[+-]?[0-9]+(?:[\.][0-9]+)*)(?:[eE][+-]?[0-9]+)?'; 15 | const REGEX_DATE = '[+-]?\d{4}-\d{2}-\d{2}'; 16 | const REGEX_FIELD = '[+-]?[a-zA-Z\_]+(?:[a-zA-Z0-9_\.\-]+)?:'; 17 | const REGEX_WORD = '[+-]?[^\s\(\)\\\\^\<\>\[\]\{\}~=]*'; 18 | const REGEX_WORD_MINIMUM = '[a-zA-Z0-9\pL]+'; 19 | const IGNORED_LEAD_TRAIL_CHARS = "#@,.!?;|&+-^~*\\\"' \t\n\r "; 20 | 21 | /** 22 | * When building a field lexeme we switch this on/off to establish proper T_FIELD_END. 23 | * It also helps us enforce range and subquery rules. 24 | * 25 | * @var bool 26 | */ 27 | private bool $inField = false; 28 | 29 | /** 30 | * This tokenizer only supports one level of sub query (for now). We only want to take 31 | * a query from a user like "funny #cats plays:>500" and parse that to a simple 32 | * object which can be translated to a sql, elasticsearch, riak, etc. query. 33 | * 34 | * @var bool 35 | */ 36 | private bool $inSubquery = false; 37 | 38 | /** 39 | * This tokenizer only supports one range to be open at a time (excl or incl). 40 | * Starting a new range of any type is ignored if it's already open and 41 | * closing a range that never started is also ignored. 42 | * 43 | * The value will be the type of range that is open or 0. 44 | * 45 | * @var int 46 | */ 47 | private int $inRange = 0; 48 | 49 | /** 50 | * The regex used to split the initial input into chunks that will be 51 | * checked for tokens during scan/tokenization. 52 | * 53 | * @var string 54 | */ 55 | private string $splitRegex; 56 | 57 | /** @var Token[] */ 58 | private array $tokens = []; 59 | 60 | /** 61 | * The last token that was scanned. 62 | * 63 | * @var Token 64 | */ 65 | private Token $lastToken; 66 | 67 | public function __construct() 68 | { 69 | $this->splitRegex = sprintf( 70 | '/(%s)/iu', 71 | implode(')|(', [ 72 | self::REGEX_EMOTICON, 73 | self::REGEX_URL, 74 | self::REGEX_PHRASE, 75 | self::REGEX_FIELD, 76 | self::REGEX_WORD, 77 | ]) 78 | ); 79 | $this->lastToken = new Token(Token::T_WHITE_SPACE); 80 | } 81 | 82 | /** 83 | * The Tokenizer is immediately reset and the new input tokenized. 84 | * Any unprocessed tokens from any previous input are lost. 85 | * 86 | * @param string $input 87 | * 88 | * @return TokenStream 89 | */ 90 | public function scan(string $input): TokenStream 91 | { 92 | $input = str_replace('""', '" "', preg_replace('/\s+/', ' ', ' ' . $input)); 93 | // $input = substr($input, 0, 256); // lef 94 | $this->inField = false; 95 | $this->inSubquery = false; 96 | $this->inRange = 0; 97 | $this->tokens = []; 98 | $this->lastToken = new Token(Token::T_WHITE_SPACE); 99 | 100 | foreach ($this->splitInput($input) as $match) { 101 | $this->extractTokens(trim($match[0])); 102 | 103 | if ($this->lastToken->isWhiteSpace() && $this->inField && !$this->inRange && !$this->inSubquery) { 104 | $this->inField = false; 105 | $this->addOperatorToken(Token::T_FIELD_END); 106 | } 107 | } 108 | 109 | if ($this->inField) { 110 | $this->inField = false; 111 | $this->addOperatorToken(Token::T_FIELD_END); 112 | } 113 | 114 | if ($this->inSubquery) { 115 | $this->inSubquery = false; 116 | $this->addOperatorToken(Token::T_SUBQUERY_END); 117 | } 118 | 119 | $this->tokens = array_values(array_filter($this->tokens, function (Token $token) { 120 | return !$token->isWhiteSpace() && !$token->isIgnored(); 121 | })); 122 | 123 | return new TokenStream($this->tokens); 124 | } 125 | 126 | /** 127 | * Splits the input into chunks that will be scanned for tokens. 128 | * 129 | * @param string $input 130 | * 131 | * @return array 132 | */ 133 | private function splitInput(string $input): array 134 | { 135 | $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE; 136 | return preg_split($this->splitRegex, $input, -1, $flags); 137 | } 138 | 139 | /** 140 | * Adds an operator token (tokens with no value). This method 141 | * also ensures the same token is not repeated. 142 | * 143 | * @param int $type 144 | */ 145 | private function addOperatorToken(int $type): void 146 | { 147 | if ($this->lastToken->typeEquals($type)) { 148 | return; 149 | } 150 | 151 | $token = new Token($type); 152 | $this->tokens[] = $token; 153 | $this->lastToken = $token; 154 | } 155 | 156 | private function addToken(int $type, float|string|null $value): void 157 | { 158 | $token = new Token($type, $value); 159 | $this->tokens[] = $token; 160 | $this->lastToken = $token; 161 | } 162 | 163 | private function extractTokens(string $value): void 164 | { 165 | if ('' === $value) { 166 | if ($this->lastToken->typeEqualsAnyOf([Token::T_REQUIRED, Token::T_PROHIBITED, Token::T_IGNORED])) { 167 | // todo: review the process of bool operators following ignored values. 168 | array_pop($this->tokens); 169 | } 170 | $this->addOperatorToken(Token::T_WHITE_SPACE); 171 | return; 172 | } 173 | 174 | if (is_numeric($value)) { 175 | $this->addToken(Token::T_NUMBER, (float)$value); 176 | return; 177 | } 178 | 179 | if ($this->extractSymbolOrKeyword($value)) { 180 | return; 181 | } 182 | 183 | switch ($value[0]) { 184 | case '+': 185 | $this->addOperatorToken(Token::T_REQUIRED); 186 | $value = substr($value, 1); 187 | break; 188 | 189 | case '-': 190 | $this->addOperatorToken(Token::T_PROHIBITED); 191 | $value = substr($value, 1); 192 | break; 193 | 194 | default: 195 | break; 196 | } 197 | 198 | if (preg_match('/^' . self::REGEX_EMOTICON . '$/', $value)) { 199 | $this->addToken(Token::T_EMOTICON, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 200 | return; 201 | } 202 | 203 | if (preg_match('/^' . self::REGEX_EMOJI . '$/u', $value)) { 204 | $this->addToken(Token::T_EMOJI, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 205 | return; 206 | } 207 | 208 | if (preg_match('/^' . self::REGEX_URL . '$/', $value)) { 209 | $this->addToken(Token::T_URL, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 210 | return; 211 | } 212 | 213 | if (!$this->inField && !$this->inSubquery 214 | && preg_match('/^' . self::REGEX_FIELD . '$/', $value) 215 | && $this->lastToken->typeEqualsAnyOf([ 216 | Token::T_WHITE_SPACE, 217 | Token::T_REQUIRED, 218 | Token::T_PROHIBITED, 219 | Token::T_FIELD_END, 220 | Token::T_SUBQUERY_START, 221 | ]) 222 | ) { 223 | $this->inField = true; 224 | $this->addToken(Token::T_FIELD_START, rtrim($value, ':')); 225 | return; 226 | } 227 | 228 | if (preg_match('/^' . self::REGEX_PHRASE . '$/', $value)) { 229 | $value = trim(trim($value, '"')); 230 | if (!empty($value)) { 231 | $this->addToken(Token::T_PHRASE, $value); 232 | } else { 233 | $this->addToken(Token::T_IGNORED, $value); 234 | } 235 | return; 236 | } 237 | 238 | if (str_contains($value, '..')) { 239 | $parts = explode('..', $value, 2); 240 | $this->extractTokens($parts[0]); 241 | $this->extractSymbolOrKeyword('..'); 242 | $this->extractTokens($parts[1] ?? ''); 243 | return; 244 | } 245 | 246 | if (preg_match('/^' . self::REGEX_HASHTAG . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) { 247 | $this->addToken(Token::T_HASHTAG, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 248 | return; 249 | } 250 | 251 | if (preg_match('/^' . self::REGEX_MENTION . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) { 252 | $this->addToken(Token::T_MENTION, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 253 | return; 254 | } 255 | 256 | if (preg_match('/^' . self::REGEX_DATE . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) { 257 | $this->addToken(Token::T_DATE, trim($value, self::IGNORED_LEAD_TRAIL_CHARS)); 258 | return; 259 | } 260 | 261 | if (preg_match('/' . self::REGEX_WORD . '/', $value)) { 262 | $hasTrailingWildcard = str_ends_with($value, '*'); 263 | $value2 = trim($value, self::IGNORED_LEAD_TRAIL_CHARS . '/'); 264 | if (!empty($value2)) { 265 | /* 266 | * When in a field or subquery you can get a value which itself looks like the start 267 | * of a field, e.g. "field:vevo:video". We don't want two words here so 268 | * merge the last "word" token value with this one. 269 | */ 270 | if ($this->lastToken->typeEquals(Token::T_WORD) 271 | && ':' === strrev($this->lastToken->getValue())[0] 272 | ) { 273 | $value2 = array_pop($this->tokens)->getValue() . $value2; 274 | } 275 | 276 | if (!preg_match('/' . self::REGEX_WORD_MINIMUM . '/u', $value2)) { 277 | $this->addToken(Token::T_IGNORED, $value2); 278 | return; 279 | } 280 | 281 | $this->addToken(Token::T_WORD, $value2); 282 | 283 | if ($hasTrailingWildcard) { 284 | $this->addOperatorToken(Token::T_WILDCARD); 285 | } 286 | 287 | return; 288 | } 289 | } 290 | 291 | $this->addToken(Token::T_IGNORED, $value); 292 | } 293 | 294 | /** 295 | * Extracts a symbol or keyword from the string and may ignore a token 296 | * if it doesn't follow some basic rules for this lib. E.g. you can't 297 | * boost whitespace " ^5". In that case, boost is ignored. 298 | * 299 | * @param string $value 300 | * 301 | * @return bool True if a symbol or keyword was extracted/processed. 302 | */ 303 | private function extractSymbolOrKeyword(string $value): bool 304 | { 305 | $len = strlen($value); 306 | if ($len > 3) { 307 | return false; 308 | } 309 | 310 | switch ($value) { 311 | case '+': 312 | $this->addOperatorToken(Token::T_REQUIRED); 313 | return true; 314 | 315 | case '-': 316 | $this->addOperatorToken(Token::T_PROHIBITED); 317 | return true; 318 | 319 | case '>': 320 | if ($this->inField && 0 === $this->inRange) { 321 | $this->addOperatorToken(Token::T_GREATER_THAN); 322 | } 323 | return true; 324 | 325 | case '<': 326 | if ($this->inField && 0 === $this->inRange) { 327 | $this->addOperatorToken(Token::T_LESS_THAN); 328 | } 329 | return true; 330 | 331 | case '=': 332 | if ($this->lastToken->typeEquals(Token::T_GREATER_THAN) 333 | || $this->lastToken->typeEquals(Token::T_LESS_THAN) 334 | ) { 335 | $this->addOperatorToken(Token::T_EQUALS); 336 | } 337 | return true; 338 | 339 | case '~': 340 | // can't fuzzy parts of a field, range or sub query 341 | if ($this->inSubquery || 0 !== $this->inRange) { 342 | // fuzzy is ignored 343 | return true; 344 | } 345 | 346 | if (!$this->lastToken->isWhiteSpace()) { 347 | if ($this->inField) { 348 | $this->inField = false; 349 | $this->addOperatorToken(Token::T_FIELD_END); 350 | } 351 | $this->addOperatorToken(Token::T_FUZZY); 352 | } 353 | return true; 354 | 355 | case '^': 356 | // can't boost parts of a field, range or sub query 357 | if ($this->inSubquery || 0 !== $this->inRange) { 358 | // boost is ignored 359 | return true; 360 | } 361 | 362 | if (!$this->lastToken->isWhiteSpace()) { 363 | if ($this->inField) { 364 | $this->inField = false; 365 | $this->addOperatorToken(Token::T_FIELD_END); 366 | } 367 | $this->addOperatorToken(Token::T_BOOST); 368 | } 369 | return true; 370 | 371 | case '[': 372 | if ($this->inField && 0 === $this->inRange) { 373 | $this->inRange = Token::T_RANGE_INCL_START; 374 | $this->addOperatorToken(Token::T_RANGE_INCL_START); 375 | } 376 | return true; 377 | 378 | case '{': 379 | if ($this->inField && 0 === $this->inRange) { 380 | $this->inRange = Token::T_RANGE_EXCL_START; 381 | $this->addOperatorToken(Token::T_RANGE_EXCL_START); 382 | } 383 | return true; 384 | 385 | case ']': 386 | case '}': 387 | if (0 !== $this->inRange) { 388 | if (Token::T_RANGE_INCL_START === $this->inRange) { 389 | $this->addOperatorToken(Token::T_RANGE_INCL_END); 390 | } else { 391 | $this->addOperatorToken(Token::T_RANGE_EXCL_END); 392 | } 393 | 394 | $this->inRange = 0; 395 | $this->inField = false; 396 | $this->addOperatorToken(Token::T_FIELD_END); 397 | } 398 | return true; 399 | 400 | case '(': 401 | // sub queries can't be nested or exist in a range. 402 | if (!$this->inSubquery && 0 === $this->inRange) { 403 | $this->addOperatorToken(Token::T_SUBQUERY_START); 404 | $this->inSubquery = true; 405 | } 406 | return true; 407 | 408 | case ')': 409 | if ($this->inSubquery && 0 === $this->inRange) { 410 | $this->inSubquery = false; 411 | $this->addOperatorToken(Token::T_SUBQUERY_END); 412 | 413 | if ($this->inField) { 414 | $this->addOperatorToken(Token::T_FIELD_END); 415 | $this->inField = false; 416 | } 417 | } 418 | return true; 419 | 420 | case '*': 421 | $this->addOperatorToken(Token::T_WILDCARD); 422 | return true; 423 | 424 | case '||': 425 | case 'OR': 426 | $this->addOperatorToken(Token::T_OR); 427 | return true; 428 | 429 | case '&&': 430 | case 'AND': 431 | $this->addOperatorToken(Token::T_AND); 432 | return true; 433 | 434 | case '..': 435 | if (0 !== $this->inRange) { 436 | $this->addOperatorToken(Token::T_TO); 437 | } 438 | return true; 439 | 440 | case 'TO': 441 | if (0 !== $this->inRange) { 442 | $this->addOperatorToken(Token::T_TO); 443 | return true; 444 | } 445 | 446 | $this->addToken(Token::T_WORD, $value); 447 | return true; 448 | 449 | default: 450 | if (1 === $len) { 451 | if (ctype_alpha($value)) { 452 | /* 453 | * A word, followed ":", followed by a single char "thing:a". 454 | * can be made into one token. 455 | * todo: review words that look like fields. seems wonky. 456 | */ 457 | if ($this->lastToken->typeEquals(Token::T_WORD) 458 | && ':' === strrev($this->lastToken->getValue())[0] 459 | ) { 460 | $value = array_pop($this->tokens)->getValue() . $value; 461 | } 462 | 463 | $this->addToken(Token::T_WORD, $value); 464 | return true; 465 | } 466 | 467 | $this->addToken(Token::T_IGNORED, $value); 468 | return true; 469 | } 470 | break; 471 | } 472 | 473 | return false; 474 | } 475 | } 476 | -------------------------------------------------------------------------------- /tests/Builder/XmlQueryBuilderTest.php: -------------------------------------------------------------------------------- 1 | parser = new QueryParser(); 19 | $this->builder = new XmlQueryBuilder(); 20 | } 21 | 22 | /** 23 | * @dataProvider getTestQueries 24 | * 25 | * @param string $name 26 | * @param string $input 27 | * @param null $ignored 28 | * @param Node[] $expectedNodes 29 | */ 30 | public function testToSimpleXmlElement(string $name, string $input, $ignored, array $expectedNodes = []): void 31 | { 32 | $this->builder->addParsedQuery($this->parser->parse($input)); 33 | $xml = $this->builder->toSimpleXmlElement(); 34 | $expectedNodeCount = count($expectedNodes); 35 | 36 | if ($expectedNodeCount && $xml->count() < $expectedNodeCount) { 37 | $this->fail('Failed to generate SimpleXmlElement from: ' . $input); 38 | } 39 | 40 | $this->assertSame($expectedNodeCount, $xml->count()); 41 | 42 | /** @var \SimpleXmlElement $child */ 43 | $i = 0; 44 | foreach ($xml->children() as $child) { 45 | if (!isset($expectedNodes[$i])) { 46 | $this->fail('Xml contains unexpected nodes'); 47 | } 48 | 49 | $node = $expectedNodes[$i]; 50 | $this->assertEquals( 51 | $node::NODE_TYPE, 52 | $child->getName(), 53 | "Test query [{$name}] with input [{$input}] failed." 54 | ); 55 | 56 | $i++; 57 | } 58 | } 59 | 60 | public function getTestQueries(): array 61 | { 62 | return require __DIR__ . '/../Fixtures/test-queries.php'; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tests/Fixtures/test-queries.php: -------------------------------------------------------------------------------- 1 | 'url', 26 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test', 27 | 'expected_tokens' => [ 28 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 29 | ], 30 | 'expected_nodes' => [ 31 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'), 32 | ], 33 | ], 34 | 35 | [ 36 | 'name' => 'required url', 37 | 'input' => '+http://test.com/1_2.html?a=b%20&c=1+2#test', 38 | 'expected_tokens' => [ 39 | T::T_REQUIRED, 40 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 41 | ], 42 | 'expected_nodes' => [ 43 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', BoolOperator::REQUIRED), 44 | ], 45 | ], 46 | 47 | [ 48 | 'name' => 'prohibited url', 49 | 'input' => '-http://test.com/1_2.html?a=b%20&c=1+2#test', 50 | 'expected_tokens' => [ 51 | T::T_PROHIBITED, 52 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 53 | ], 54 | 'expected_nodes' => [ 55 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', BoolOperator::PROHIBITED), 56 | ], 57 | ], 58 | 59 | [ 60 | 'name' => 'url with boost int', 61 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test^5', 62 | 'expected_tokens' => [ 63 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 64 | T::T_BOOST, 65 | [T::T_NUMBER, 5.0], 66 | ], 67 | 'expected_nodes' => [ 68 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', null, true, 5.0), 69 | ], 70 | ], 71 | 72 | [ 73 | 'name' => 'url with boost float', 74 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test^15.5', 75 | 'expected_tokens' => [ 76 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 77 | T::T_BOOST, 78 | [T::T_NUMBER, 15.5], 79 | ], 80 | 'expected_nodes' => [ 81 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', null, true, Url::MAX_BOOST), 82 | ], 83 | ], 84 | 85 | [ 86 | 'name' => 'url with fuzzy int', 87 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test~5', 88 | 'expected_tokens' => [ 89 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 90 | T::T_FUZZY, 91 | [T::T_NUMBER, 5.0], 92 | ], 93 | 'expected_nodes' => [ 94 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'), 95 | ], 96 | ], 97 | 98 | [ 99 | 'name' => 'url with fuzzy float', 100 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test~5.5', 101 | 'expected_tokens' => [ 102 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'], 103 | T::T_FUZZY, 104 | [T::T_NUMBER, 5.5], 105 | ], 106 | 'expected_nodes' => [ 107 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'), 108 | ], 109 | ], 110 | /* 111 | * END: URLS 112 | */ 113 | 114 | 115 | /* 116 | * START: EMOTICONS 117 | * todo: need more emoticon tests 118 | */ 119 | [ 120 | 'name' => 'simple emoticons', 121 | 'input' => ':) :(', 122 | 'expected_tokens' => [ 123 | [T::T_EMOTICON, ':)'], 124 | [T::T_EMOTICON, ':('], 125 | ], 126 | 'expected_nodes' => [ 127 | new Emoticon(':)', BoolOperator::REQUIRED), 128 | new Emoticon(':(', BoolOperator::REQUIRED), 129 | ], 130 | ], 131 | /* 132 | * END: EMOTICONS 133 | */ 134 | 135 | 136 | /* 137 | * START: EMOJIS 138 | */ 139 | [ 140 | 'name' => 'simple emoji', 141 | 'input' => 'ice 🍦 poop 💩 doh 😳', 142 | 'expected_tokens' => [ 143 | [T::T_WORD, 'ice'], 144 | [T::T_EMOJI, '🍦'], 145 | [T::T_WORD, 'poop'], 146 | [T::T_EMOJI, '💩'], 147 | [T::T_WORD, 'doh'], 148 | [T::T_EMOJI, '😳'], 149 | ], 150 | 'expected_nodes' => [ 151 | new Word('ice'), 152 | new Emoji('🍦', BoolOperator::REQUIRED), 153 | new Word('poop'), 154 | new Emoji('💩', BoolOperator::REQUIRED), 155 | new Word('doh'), 156 | new Emoji('😳', BoolOperator::REQUIRED), 157 | ], 158 | ], 159 | /* 160 | * END: EMOJIS 161 | */ 162 | 163 | 164 | /* 165 | * START: BOOST AND FUZZY 166 | */ 167 | [ 168 | 'name' => 'boost and fuzzy in filter', 169 | 'input' => 'f:b^5 f:f~5', 170 | 'expected_tokens' => [ 171 | [T::T_FIELD_START, 'f'], 172 | [T::T_WORD, 'b'], 173 | T::T_FIELD_END, 174 | T::T_BOOST, 175 | [T::T_NUMBER, 5.0], 176 | [T::T_FIELD_START, 'f'], 177 | [T::T_WORD, 'f'], 178 | T::T_FIELD_END, 179 | T::T_FUZZY, 180 | [T::T_NUMBER, 5.0], 181 | ], 182 | 'expected_nodes' => [ 183 | new Field('f', new Word('b'), null, true, 5.0), 184 | new Field('f', new Word('f'), null, false, Field::DEFAULT_BOOST), 185 | ], 186 | ], 187 | 188 | [ 189 | 'name' => 'boost and fuzzy in range', 190 | 'input' => 'f:[1^5..5]^5 f:[1~5..5]~5', 191 | 'expected_tokens' => [ 192 | [T::T_FIELD_START, 'f'], 193 | T::T_RANGE_INCL_START, 194 | [T::T_NUMBER, 1.0], 195 | [T::T_NUMBER, 5.0], 196 | T::T_TO, 197 | [T::T_NUMBER, 5.0], 198 | T::T_RANGE_INCL_END, 199 | T::T_FIELD_END, 200 | T::T_BOOST, 201 | [T::T_NUMBER, 5.0], 202 | [T::T_FIELD_START, 'f'], 203 | T::T_RANGE_INCL_START, 204 | [T::T_NUMBER, 1.0], 205 | [T::T_NUMBER, 5.0], 206 | T::T_TO, 207 | [T::T_NUMBER, 5.0], 208 | T::T_RANGE_INCL_END, 209 | T::T_FIELD_END, 210 | T::T_FUZZY, 211 | [T::T_NUMBER, 5.0], 212 | ], 213 | 'expected_nodes' => [ 214 | new Field( 215 | 'f', 216 | new NumberRange( 217 | new Numbr(1.0), 218 | new Numbr(5.0) 219 | ), 220 | null, 221 | true, 222 | 5.0 223 | ), 224 | new Field( 225 | 'f', 226 | new NumberRange( 227 | new Numbr(1.0), 228 | new Numbr(5.0) 229 | ), 230 | null, 231 | false, 232 | Field::DEFAULT_BOOST 233 | ), 234 | ], 235 | ], 236 | /* 237 | * END: BOOST AND FUZZY 238 | */ 239 | 240 | 241 | /* 242 | * START: PHRASES 243 | */ 244 | [ 245 | 'name' => 'simple phrase', 246 | 'input' => 'a "simple phrase"', 247 | 'expected_tokens' => [ 248 | [T::T_WORD, 'a'], 249 | [T::T_PHRASE, 'simple phrase'], 250 | ], 251 | 'expected_nodes' => [ 252 | new Word('a'), 253 | new Phrase('simple phrase'), 254 | ], 255 | ], 256 | 257 | [ 258 | 'name' => 'required phrase', 259 | 'input' => 'a +"simple phrase"', 260 | 'expected_tokens' => [ 261 | [T::T_WORD, 'a'], 262 | T::T_REQUIRED, 263 | [T::T_PHRASE, 'simple phrase'], 264 | ], 265 | 'expected_nodes' => [ 266 | new Word('a'), 267 | new Phrase('simple phrase', BoolOperator::REQUIRED), 268 | ], 269 | ], 270 | 271 | [ 272 | 'name' => 'prohibited phrase', 273 | 'input' => 'a -"simple phrase"', 274 | 'expected_tokens' => [ 275 | [T::T_WORD, 'a'], 276 | T::T_PROHIBITED, 277 | [T::T_PHRASE, 'simple phrase'], 278 | ], 279 | 'expected_nodes' => [ 280 | new Word('a'), 281 | new Phrase('simple phrase', BoolOperator::PROHIBITED), 282 | ], 283 | ], 284 | 285 | [ 286 | 'name' => 'boosted phrase int', 287 | 'input' => 'a "simple phrase"^1', 288 | 'expected_tokens' => [ 289 | [T::T_WORD, 'a'], 290 | [T::T_PHRASE, 'simple phrase'], 291 | T::T_BOOST, 292 | [T::T_NUMBER, 1.0], 293 | ], 294 | 'expected_nodes' => [ 295 | new Word('a'), 296 | new Phrase('simple phrase', null, true, 1.0), 297 | ], 298 | ], 299 | 300 | [ 301 | 'name' => 'boosted phrase float', 302 | 'input' => 'a "simple phrase"^0.1', 303 | 'expected_tokens' => [ 304 | [T::T_WORD, 'a'], 305 | [T::T_PHRASE, 'simple phrase'], 306 | T::T_BOOST, 307 | [T::T_NUMBER, 0.1], 308 | ], 309 | 'expected_nodes' => [ 310 | new Word('a'), 311 | new Phrase('simple phrase', null, true, 0.1), 312 | ], 313 | ], 314 | 315 | [ 316 | 'name' => 'fuzzy phrase int', 317 | 'input' => 'a "simple phrase"~1', 318 | 'expected_tokens' => [ 319 | [T::T_WORD, 'a'], 320 | [T::T_PHRASE, 'simple phrase'], 321 | T::T_FUZZY, 322 | [T::T_NUMBER, 1.0], 323 | ], 324 | 'expected_nodes' => [ 325 | new Word('a'), 326 | new Phrase('simple phrase', null, false, Phrase::DEFAULT_BOOST, true, Phrase::MIN_FUZZY), 327 | ], 328 | ], 329 | 330 | [ 331 | 'name' => 'fuzzy phrase float', 332 | 'input' => 'a "simple phrase"~0.1', 333 | 'expected_tokens' => [ 334 | [T::T_WORD, 'a'], 335 | [T::T_PHRASE, 'simple phrase'], 336 | T::T_FUZZY, 337 | [T::T_NUMBER, 0.1], 338 | ], 339 | 'expected_nodes' => [ 340 | new Word('a'), 341 | new Phrase('simple phrase', null, false, Phrase::DEFAULT_BOOST, true, Phrase::MIN_FUZZY), 342 | ], 343 | ], 344 | 345 | [ 346 | 'name' => 'phrase with embedded emoticons', 347 | 'input' => '"a smiley :)"', 348 | 'expected_tokens' => [ 349 | [T::T_PHRASE, 'a smiley :)'], 350 | ], 351 | 'expected_nodes' => [ 352 | new Phrase('a smiley :)'), 353 | ], 354 | ], 355 | 356 | [ 357 | 'name' => 'phrase with embedded emojis', 358 | 'input' => '"ice cream 🍦"', 359 | 'expected_tokens' => [ 360 | [T::T_PHRASE, 'ice cream 🍦'], 361 | ], 362 | 'expected_nodes' => [ 363 | new Phrase('ice cream 🍦'), 364 | ], 365 | ], 366 | 367 | [ 368 | 'name' => 'phrase with embedded punctation, boosting, etc.', 369 | 'input' => '"boosted^51.50 .. field:test~5"', 370 | 'expected_tokens' => [ 371 | [T::T_PHRASE, 'boosted^51.50 .. field:test~5'], 372 | ], 373 | 'expected_nodes' => [ 374 | new Phrase('boosted^51.50 .. field:test~5'), 375 | ], 376 | ], 377 | 378 | [ 379 | 'name' => 'phrase with dates', 380 | 'input' => '"in the year >=2000-01-01"', 381 | 'expected_tokens' => [ 382 | [T::T_PHRASE, 'in the year >=2000-01-01'], 383 | ], 384 | 'expected_nodes' => [ 385 | new Phrase('in the year >=2000-01-01'), 386 | ], 387 | ], 388 | 389 | [ 390 | 'name' => 'phrase on phrase', 391 | 'input' => '"p1""p2""p3', 392 | 'expected_tokens' => [ 393 | [T::T_PHRASE, 'p1'], 394 | [T::T_PHRASE, 'p2'], 395 | [T::T_WORD, 'p3'], 396 | ], 397 | 'expected_nodes' => [ 398 | new Phrase('p1'), 399 | new Phrase('p2'), 400 | new Word('p3'), 401 | ], 402 | ], 403 | /* 404 | * END: PHRASES 405 | */ 406 | 407 | 408 | /* 409 | * START: HASHTAGS 410 | */ 411 | [ 412 | 'name' => 'simple hashtags', 413 | 'input' => 'a #Cat in a #hat', 414 | 'expected_tokens' => [ 415 | [T::T_WORD, 'a'], 416 | [T::T_HASHTAG, 'Cat'], 417 | [T::T_WORD, 'in'], 418 | [T::T_WORD, 'a'], 419 | [T::T_HASHTAG, 'hat'], 420 | ], 421 | 'expected_nodes' => [ 422 | new Word('a'), 423 | new Hashtag('Cat', BoolOperator::REQUIRED), 424 | new Word('in'), 425 | new Word('a'), 426 | new Hashtag('hat', BoolOperator::REQUIRED), 427 | ], 428 | ], 429 | 430 | [ 431 | 'name' => 'required/prohibited hashtags with boost', 432 | 'input' => '+#Cat -#hat^100', 433 | 'expected_tokens' => [ 434 | T::T_REQUIRED, 435 | [T::T_HASHTAG, 'Cat'], 436 | T::T_PROHIBITED, 437 | [T::T_HASHTAG, 'hat'], 438 | T::T_BOOST, 439 | [T::T_NUMBER, 100.0], 440 | ], 441 | 'expected_nodes' => [ 442 | new Hashtag('Cat', BoolOperator::REQUIRED), 443 | new Hashtag('hat', BoolOperator::PROHIBITED, true, Hashtag::MAX_BOOST), 444 | ], 445 | ], 446 | 447 | [ 448 | 'name' => 'required/prohibited hashtags with fuzzy', 449 | 'input' => '#hat~100 #hat~100.1', 450 | 'expected_tokens' => [ 451 | [T::T_HASHTAG, 'hat'], 452 | T::T_FUZZY, 453 | [T::T_NUMBER, 100.0], 454 | [T::T_HASHTAG, 'hat'], 455 | T::T_FUZZY, 456 | [T::T_NUMBER, 100.1], 457 | ], 458 | 'expected_nodes' => [ 459 | new Hashtag('hat', BoolOperator::REQUIRED), 460 | new Hashtag('hat', BoolOperator::REQUIRED), 461 | ], 462 | ], 463 | 464 | [ 465 | 'name' => 'required/prohibited hashtags with boost', 466 | 'input' => '+#Cat -#hat^100 #_cat #2015cat__', 467 | 'expected_tokens' => [ 468 | T::T_REQUIRED, 469 | [T::T_HASHTAG, 'Cat'], 470 | T::T_PROHIBITED, 471 | [T::T_HASHTAG, 'hat'], 472 | T::T_BOOST, 473 | [T::T_NUMBER, 100.0], 474 | [T::T_HASHTAG, '_cat'], 475 | [T::T_HASHTAG, '2015cat__'], 476 | ], 477 | 'expected_nodes' => [ 478 | new Hashtag('Cat', BoolOperator::REQUIRED), 479 | new Hashtag('hat', BoolOperator::PROHIBITED, true, Hashtag::MAX_BOOST), 480 | new Hashtag('_cat', BoolOperator::REQUIRED), 481 | new Hashtag('2015cat__', BoolOperator::REQUIRED), 482 | ], 483 | ], 484 | 485 | // todo: should we refactor to catch #hashtag#hashtag or @mention#tag or #tag@mention? 486 | [ 487 | 'name' => 'hashtag on hashtag and double hashtag', 488 | 'input' => '#cat#cat ##cat #####cat', 489 | 'expected_tokens' => [ 490 | [T::T_WORD, 'cat#cat'], 491 | [T::T_HASHTAG, 'cat'], 492 | [T::T_HASHTAG, 'cat'], 493 | ], 494 | 'expected_nodes' => [ 495 | new Word('cat#cat'), 496 | new Hashtag('cat', BoolOperator::REQUIRED), 497 | new Hashtag('cat', BoolOperator::REQUIRED), 498 | ], 499 | ], 500 | /* 501 | * END: HASHTAGS 502 | */ 503 | 504 | 505 | /* 506 | * START: MENTIONS 507 | */ 508 | [ 509 | 'name' => 'simple mentions', 510 | 'input' => '@user @user_name @user.name @user-name', 511 | 'expected_tokens' => [ 512 | [T::T_MENTION, 'user'], 513 | [T::T_MENTION, 'user_name'], 514 | [T::T_MENTION, 'user.name'], 515 | [T::T_MENTION, 'user-name'], 516 | ], 517 | 'expected_nodes' => [ 518 | new Mention('user', BoolOperator::REQUIRED), 519 | new Mention('user_name', BoolOperator::REQUIRED), 520 | new Mention('user.name', BoolOperator::REQUIRED), 521 | new Mention('user-name', BoolOperator::REQUIRED), 522 | 523 | ], 524 | ], 525 | 526 | [ 527 | 'name' => 'required mentions', 528 | 'input' => '+@user +@user_name +@user.name +@user-name', 529 | 'expected_tokens' => [ 530 | T::T_REQUIRED, 531 | [T::T_MENTION, 'user'], 532 | T::T_REQUIRED, 533 | [T::T_MENTION, 'user_name'], 534 | T::T_REQUIRED, 535 | [T::T_MENTION, 'user.name'], 536 | T::T_REQUIRED, 537 | [T::T_MENTION, 'user-name'], 538 | ], 539 | 'expected_nodes' => [ 540 | new Mention('user', BoolOperator::REQUIRED), 541 | new Mention('user_name', BoolOperator::REQUIRED), 542 | new Mention('user.name', BoolOperator::REQUIRED), 543 | new Mention('user-name', BoolOperator::REQUIRED), 544 | ], 545 | ], 546 | 547 | [ 548 | 'name' => 'prohibited mentions', 549 | 'input' => '-@user -@user_name -@user.name -@user-name', 550 | 'expected_tokens' => [ 551 | T::T_PROHIBITED, 552 | [T::T_MENTION, 'user'], 553 | T::T_PROHIBITED, 554 | [T::T_MENTION, 'user_name'], 555 | T::T_PROHIBITED, 556 | [T::T_MENTION, 'user.name'], 557 | T::T_PROHIBITED, 558 | [T::T_MENTION, 'user-name'], 559 | ], 560 | 'expected_nodes' => [ 561 | new Mention('user', BoolOperator::PROHIBITED), 562 | new Mention('user_name', BoolOperator::PROHIBITED), 563 | new Mention('user.name', BoolOperator::PROHIBITED), 564 | new Mention('user-name', BoolOperator::PROHIBITED), 565 | ], 566 | ], 567 | 568 | [ 569 | 'name' => 'mentions with emails and hashtags', 570 | 'input' => '@john@doe.com @john#doe', 571 | 'expected_tokens' => [ 572 | [T::T_WORD, 'john@doe.com'], 573 | [T::T_WORD, 'john#doe'], 574 | ], 575 | 'expected_nodes' => [ 576 | new Word('john@doe.com'), 577 | new Word('john#doe'), 578 | ], 579 | ], 580 | 581 | [ 582 | 'name' => 'mentions with punctuation', 583 | 'input' => '@john. @wtf! @who?', 584 | 'expected_tokens' => [ 585 | [T::T_MENTION, 'john'], 586 | [T::T_MENTION, 'wtf'], 587 | [T::T_MENTION, 'who'], 588 | ], 589 | 'expected_nodes' => [ 590 | new Mention('john', BoolOperator::REQUIRED), 591 | new Mention('wtf', BoolOperator::REQUIRED), 592 | new Mention('who', BoolOperator::REQUIRED), 593 | ], 594 | ], 595 | 596 | [ 597 | 'name' => 'mentions with special chars', 598 | 'input' => '@john^doe @john!doe', 599 | 'expected_tokens' => [ 600 | [T::T_MENTION, 'john'], 601 | T::T_BOOST, 602 | [T::T_WORD, 'doe'], 603 | [T::T_WORD, 'john!doe'], 604 | ], 605 | 'expected_nodes' => [ 606 | new Mention('john', BoolOperator::REQUIRED), 607 | new Word('doe'), 608 | new Word('john!doe'), 609 | ], 610 | ], 611 | /* 612 | * END: MENTIONS 613 | */ 614 | 615 | 616 | /* 617 | * START: NUMBERS 618 | */ 619 | [ 620 | 'name' => 'integers, decimals and exponential form', 621 | 'input' => '100 3.1415926535898 2.2E-5', 622 | 'expected_tokens' => [ 623 | [T::T_NUMBER, 100.0], 624 | [T::T_NUMBER, 3.1415926535898], 625 | [T::T_NUMBER, 2.2E-5], 626 | ], 627 | 'expected_nodes' => [ 628 | new Numbr(100.0), 629 | new Numbr(3.1415926535898), 630 | new Numbr(2.2E-5), 631 | ], 632 | ], 633 | 634 | [ 635 | 'name' => 'negative integers, decimals and exponential form', 636 | 'input' => '-100 -3.1415926535898 -2.2E-5', 637 | 'expected_tokens' => [ 638 | [T::T_NUMBER, -100.0], 639 | [T::T_NUMBER, -3.1415926535898], 640 | [T::T_NUMBER, -2.2E-5], 641 | ], 642 | 'expected_nodes' => [ 643 | new Numbr(-100.0), 644 | new Numbr(-3.1415926535898), 645 | new Numbr(-2.2E-5), 646 | ], 647 | ], 648 | 649 | [ 650 | 'name' => 'words with boosted numbers', 651 | 'input' => 'word^100 word^3.1415926535898 word^2.2E-5', 652 | 'expected_tokens' => [ 653 | [T::T_WORD, 'word'], 654 | T::T_BOOST, 655 | [T::T_NUMBER, 100.0], 656 | [T::T_WORD, 'word'], 657 | T::T_BOOST, 658 | [T::T_NUMBER, 3.1415926535898], 659 | [T::T_WORD, 'word'], 660 | T::T_BOOST, 661 | [T::T_NUMBER, 2.2E-5], 662 | ], 663 | 'expected_nodes' => [ 664 | new Word('word', null, true, 10.0), 665 | new Word('word', null, true, 3.1415926535898), 666 | new Word('word', null, true, 2.2E-5), 667 | ], 668 | ], 669 | 670 | [ 671 | 'name' => 'words with boosted negative numbers', 672 | 'input' => 'word^-100 word^-3.1415926535898 word^-2.2E-5', 673 | 'expected_tokens' => [ 674 | [T::T_WORD, 'word'], 675 | T::T_BOOST, 676 | [T::T_NUMBER, -100.0], 677 | [T::T_WORD, 'word'], 678 | T::T_BOOST, 679 | [T::T_NUMBER, -3.1415926535898], 680 | [T::T_WORD, 'word'], 681 | T::T_BOOST, 682 | [T::T_NUMBER, -2.2E-5], 683 | ], 684 | 'expected_nodes' => [ 685 | new Word('word', null, true, 0.0), 686 | new Word('word', null, true, 0.0), 687 | new Word('word', null, true, 0.0), 688 | ], 689 | ], 690 | 691 | [ 692 | 'name' => 'words with fuzzy numbers', 693 | 'input' => 'word~100 word~3.1415926535898 word~2.2E-5', 694 | 'expected_tokens' => [ 695 | [T::T_WORD, 'word'], 696 | T::T_FUZZY, 697 | [T::T_NUMBER, 100.0], 698 | [T::T_WORD, 'word'], 699 | T::T_FUZZY, 700 | [T::T_NUMBER, 3.1415926535898], 701 | [T::T_WORD, 'word'], 702 | T::T_FUZZY, 703 | [T::T_NUMBER, 2.2E-5], 704 | ], 705 | 'expected_nodes' => [ 706 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MAX_FUZZY), 707 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MAX_FUZZY), 708 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY), 709 | ], 710 | ], 711 | 712 | [ 713 | 'name' => 'words with fuzzy negative numbers', 714 | 'input' => 'word~-100 word~-3.1415926535898 word~-2.2E-5', 715 | 'expected_tokens' => [ 716 | [T::T_WORD, 'word'], 717 | T::T_FUZZY, 718 | [T::T_NUMBER, -100.0], 719 | [T::T_WORD, 'word'], 720 | T::T_FUZZY, 721 | [T::T_NUMBER, -3.1415926535898], 722 | [T::T_WORD, 'word'], 723 | T::T_FUZZY, 724 | [T::T_NUMBER, -2.2E-5], 725 | ], 726 | 'expected_nodes' => [ 727 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY), 728 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY), 729 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY), 730 | ], 731 | ], 732 | /* 733 | * END: NUMBERS 734 | */ 735 | 736 | 737 | /* 738 | * START: FIELDS 739 | */ 740 | [ 741 | 'name' => 'fields with hypen, underscore and dot', 742 | 'input' => '+first-name:homer -last_name:simpson job.performance:poor^5', 743 | 'expected_tokens' => [ 744 | T::T_REQUIRED, 745 | [T::T_FIELD_START, 'first-name'], 746 | [T::T_WORD, 'homer'], 747 | T::T_FIELD_END, 748 | T::T_PROHIBITED, 749 | [T::T_FIELD_START, 'last_name'], 750 | [T::T_WORD, 'simpson'], 751 | T::T_FIELD_END, 752 | [T::T_FIELD_START, 'job.performance'], 753 | [T::T_WORD, 'poor'], 754 | T::T_FIELD_END, 755 | T::T_BOOST, 756 | [T::T_NUMBER, 5.0], 757 | ], 758 | 'expected_nodes' => [ 759 | new Field('first-name', new Word('homer'), BoolOperator::REQUIRED, false, Field::DEFAULT_BOOST), 760 | new Field('last_name', new Word('simpson'), BoolOperator::PROHIBITED, false, Field::DEFAULT_BOOST), 761 | new Field('job.performance', new Word('poor'), null, true, 5.0), 762 | ], 763 | ], 764 | 765 | [ 766 | 'name' => 'field with field in it', 767 | 'input' => 'field:subfield:what', 768 | 'expected_tokens' => [ 769 | [T::T_FIELD_START, 'field'], 770 | [T::T_WORD, 'subfield:what'], 771 | T::T_FIELD_END, 772 | ], 773 | 'expected_nodes' => [ 774 | new Field('field', new Word('subfield:what'), null, false, Field::DEFAULT_BOOST), 775 | ], 776 | ], 777 | 778 | [ 779 | 'name' => 'field with no value', 780 | 'input' => 'field:', 781 | 'expected_tokens' => [ 782 | [T::T_FIELD_START, 'field'], 783 | T::T_FIELD_END, 784 | ], 785 | 'expected_nodes' => [ 786 | new Word('field'), 787 | ], 788 | ], 789 | 790 | [ 791 | 'name' => 'field with phrases', 792 | 'input' => 'field:"boosted^5 +required"^1 -field:"[1..5]"~4', 793 | 'expected_tokens' => [ 794 | [T::T_FIELD_START, 'field'], 795 | [T::T_PHRASE, 'boosted^5 +required'], 796 | T::T_FIELD_END, 797 | T::T_BOOST, 798 | [T::T_NUMBER, 1.0], 799 | T::T_PROHIBITED, 800 | [T::T_FIELD_START, 'field'], 801 | [T::T_PHRASE, '[1..5]'], 802 | T::T_FIELD_END, 803 | T::T_FUZZY, 804 | [T::T_NUMBER, 4.0], 805 | ], 806 | 'expected_nodes' => [ 807 | new Field('field', new Phrase('boosted^5 +required'), null, true, 1.0), 808 | new Field('field', new Phrase('[1..5]'), BoolOperator::PROHIBITED, false, Field::DEFAULT_BOOST), 809 | ], 810 | ], 811 | 812 | [ 813 | 'name' => 'field with greater/less than', 814 | 'input' => 'field:>100 field:>=100.1 field:<100 field:<=100.1', 815 | 'expected_tokens' => [ 816 | [T::T_FIELD_START, 'field'], 817 | T::T_GREATER_THAN, 818 | [T::T_NUMBER, 100.0], 819 | T::T_FIELD_END, 820 | [T::T_FIELD_START, 'field'], 821 | T::T_GREATER_THAN, 822 | T::T_EQUALS, 823 | [T::T_NUMBER, 100.1], 824 | T::T_FIELD_END, 825 | [T::T_FIELD_START, 'field'], 826 | T::T_LESS_THAN, 827 | [T::T_NUMBER, 100.0], 828 | T::T_FIELD_END, 829 | [T::T_FIELD_START, 'field'], 830 | T::T_LESS_THAN, 831 | T::T_EQUALS, 832 | [T::T_NUMBER, 100.1], 833 | T::T_FIELD_END, 834 | ], 835 | 'expected_nodes' => [ 836 | new Field('field', new Numbr(100, ComparisonOperator::GT), null, false, Field::DEFAULT_BOOST), 837 | new Field('field', new Numbr(100.1, ComparisonOperator::GTE), null, false, Field::DEFAULT_BOOST), 838 | new Field('field', new Numbr(100, ComparisonOperator::LT), null, false, Field::DEFAULT_BOOST), 839 | new Field('field', new Numbr(100.1, ComparisonOperator::LTE), null, false, Field::DEFAULT_BOOST), 840 | ], 841 | ], 842 | 843 | [ 844 | 'name' => 'field with a hashtag or mention', 845 | 'input' => 'field:#cats field:@user.name', 846 | 'expected_tokens' => [ 847 | [T::T_FIELD_START, 'field'], 848 | [T::T_HASHTAG, 'cats'], 849 | T::T_FIELD_END, 850 | [T::T_FIELD_START, 'field'], 851 | [T::T_MENTION, 'user.name'], 852 | T::T_FIELD_END, 853 | ], 854 | 'expected_nodes' => [ 855 | new Field('field', new Hashtag('cats', BoolOperator::REQUIRED), null, false, Field::DEFAULT_BOOST), 856 | new Field('field', new Mention('user.name', BoolOperator::REQUIRED), null, false, Field::DEFAULT_BOOST), 857 | ], 858 | ], 859 | 860 | [ 861 | 'name' => 'field with inclusive range', 862 | 'input' => 'field:[1..5] +field:[1 TO 5]', 863 | 'expected_tokens' => [ 864 | [T::T_FIELD_START, 'field'], 865 | T::T_RANGE_INCL_START, 866 | [T::T_NUMBER, 1.0], 867 | T::T_TO, 868 | [T::T_NUMBER, 5.0], 869 | T::T_RANGE_INCL_END, 870 | T::T_FIELD_END, 871 | T::T_REQUIRED, 872 | [T::T_FIELD_START, 'field'], 873 | T::T_RANGE_INCL_START, 874 | [T::T_NUMBER, 1.0], 875 | T::T_TO, 876 | [T::T_NUMBER, 5.0], 877 | T::T_RANGE_INCL_END, 878 | T::T_FIELD_END, 879 | ], 880 | 'expected_nodes' => [ 881 | new Field( 882 | 'field', 883 | new NumberRange( 884 | new Numbr(1), 885 | new Numbr(5) 886 | ), 887 | null, 888 | false, 889 | Field::DEFAULT_BOOST 890 | ), 891 | new Field( 892 | 'field', 893 | new NumberRange( 894 | new Numbr(1), 895 | new Numbr(5) 896 | ), 897 | BoolOperator::REQUIRED, 898 | false, 899 | Field::DEFAULT_BOOST 900 | ), 901 | ], 902 | ], 903 | 904 | [ 905 | 'name' => 'field with exclusive range', 906 | 'input' => 'field:{1.1..5.5} +field:{1.1 TO 5.5}', 907 | 'expected_tokens' => [ 908 | [T::T_FIELD_START, 'field'], 909 | T::T_RANGE_EXCL_START, 910 | [T::T_NUMBER, 1.1], 911 | T::T_TO, 912 | [T::T_NUMBER, 5.5], 913 | T::T_RANGE_EXCL_END, 914 | T::T_FIELD_END, 915 | T::T_REQUIRED, 916 | [T::T_FIELD_START, 'field'], 917 | T::T_RANGE_EXCL_START, 918 | [T::T_NUMBER, 1.1], 919 | T::T_TO, 920 | [T::T_NUMBER, 5.5], 921 | T::T_RANGE_EXCL_END, 922 | T::T_FIELD_END, 923 | ], 924 | 'expected_nodes' => [ 925 | new Field( 926 | 'field', 927 | new NumberRange( 928 | new Numbr(1.1), 929 | new Numbr(5.5), 930 | true 931 | ), 932 | null, 933 | false, 934 | Field::DEFAULT_BOOST 935 | ), 936 | new Field( 937 | 'field', 938 | new NumberRange( 939 | new Numbr(1.1), 940 | new Numbr(5.5), 941 | true 942 | ), 943 | BoolOperator::REQUIRED, 944 | false, 945 | Field::DEFAULT_BOOST 946 | ), 947 | ], 948 | ], 949 | 950 | [ 951 | 'name' => 'field with subquery', 952 | 'input' => 'field:(cat OR dog) test', 953 | 'expected_tokens' => [ 954 | [T::T_FIELD_START, 'field'], 955 | T::T_SUBQUERY_START, 956 | [T::T_WORD, 'cat'], 957 | T::T_OR, 958 | [T::T_WORD, 'dog'], 959 | T::T_SUBQUERY_END, 960 | T::T_FIELD_END, 961 | [T::T_WORD, 'test'], 962 | ], 963 | 'expected_nodes' => [ 964 | new Field( 965 | 'field', 966 | new Subquery([ 967 | new Word('cat'), 968 | new Word('dog'), 969 | ]), 970 | null, 971 | false, 972 | Field::DEFAULT_BOOST 973 | ), 974 | new Word('test'), 975 | ], 976 | ], 977 | 978 | [ 979 | 'name' => 'field with range in subquery', 980 | 'input' => 'field:(cat OR 1..5)', 981 | 'expected_tokens' => [ 982 | [T::T_FIELD_START, 'field'], 983 | T::T_SUBQUERY_START, 984 | [T::T_WORD, 'cat'], 985 | T::T_OR, 986 | [T::T_NUMBER, 1.0], 987 | [T::T_NUMBER, 5.0], 988 | T::T_SUBQUERY_END, 989 | T::T_FIELD_END, 990 | ], 991 | 'expected_nodes' => [ 992 | new Field( 993 | 'field', 994 | new Subquery([ 995 | new Word('cat'), 996 | new Numbr(1.0), 997 | new Numbr(5.0), 998 | ]), 999 | null, 1000 | false, 1001 | Field::DEFAULT_BOOST 1002 | ), 1003 | ], 1004 | ], 1005 | 1006 | [ 1007 | 'name' => 'field with dates', 1008 | 'input' => 'field:2015-12-18 field:>2015-12-18 field:<2015-12-18 field:>=2015-12-18 field:<=2015-12-18', 1009 | 'expected_tokens' => [ 1010 | [T::T_FIELD_START, 'field'], 1011 | [T::T_DATE, '2015-12-18'], 1012 | T::T_FIELD_END, 1013 | [T::T_FIELD_START, 'field'], 1014 | T::T_GREATER_THAN, 1015 | [T::T_DATE, '2015-12-18'], 1016 | T::T_FIELD_END, 1017 | [T::T_FIELD_START, 'field'], 1018 | T::T_LESS_THAN, 1019 | [T::T_DATE, '2015-12-18'], 1020 | T::T_FIELD_END, 1021 | [T::T_FIELD_START, 'field'], 1022 | T::T_GREATER_THAN, 1023 | T::T_EQUALS, 1024 | [T::T_DATE, '2015-12-18'], 1025 | T::T_FIELD_END, 1026 | [T::T_FIELD_START, 'field'], 1027 | T::T_LESS_THAN, 1028 | T::T_EQUALS, 1029 | [T::T_DATE, '2015-12-18'], 1030 | T::T_FIELD_END, 1031 | ], 1032 | 'expected_nodes' => [ 1033 | new Field( 1034 | 'field', 1035 | new Date('2015-12-18'), 1036 | null, 1037 | false, 1038 | Field::DEFAULT_BOOST 1039 | ), 1040 | new Field( 1041 | 'field', 1042 | new Date( 1043 | '2015-12-18', 1044 | null, 1045 | false, 1046 | Date::DEFAULT_BOOST, 1047 | false, 1048 | Date::DEFAULT_FUZZY, 1049 | ComparisonOperator::GT 1050 | ), 1051 | null, 1052 | false, 1053 | Field::DEFAULT_BOOST 1054 | ), 1055 | new Field( 1056 | 'field', 1057 | new Date( 1058 | '2015-12-18', 1059 | null, 1060 | false, 1061 | Date::DEFAULT_BOOST, 1062 | false, 1063 | Date::DEFAULT_FUZZY, 1064 | ComparisonOperator::LT 1065 | ), 1066 | null, 1067 | false, 1068 | Field::DEFAULT_BOOST 1069 | ), 1070 | new Field( 1071 | 'field', 1072 | new Date( 1073 | '2015-12-18', 1074 | null, 1075 | false, 1076 | Date::DEFAULT_BOOST, 1077 | false, 1078 | Date::DEFAULT_FUZZY, 1079 | ComparisonOperator::GTE 1080 | ), 1081 | null, 1082 | false, 1083 | Field::DEFAULT_BOOST 1084 | ), 1085 | new Field( 1086 | 'field', 1087 | new Date( 1088 | '2015-12-18', 1089 | null, 1090 | false, 1091 | Date::DEFAULT_BOOST, 1092 | false, 1093 | Date::DEFAULT_FUZZY, 1094 | ComparisonOperator::LTE 1095 | ), 1096 | null, 1097 | false, 1098 | Field::DEFAULT_BOOST 1099 | ), 1100 | ], 1101 | ], 1102 | 1103 | [ 1104 | 'name' => 'field leading _ and uuid', 1105 | 'input' => '_id:a9fc3e46-150a-45cd-ad39-c80f93119900^5', 1106 | 'expected_tokens' => [ 1107 | [T::T_FIELD_START, '_id'], 1108 | [T::T_WORD, 'a9fc3e46-150a-45cd-ad39-c80f93119900'], 1109 | T::T_FIELD_END, 1110 | T::T_BOOST, 1111 | [T::T_NUMBER, 5.0], 1112 | ], 1113 | 'expected_nodes' => [ 1114 | new Field('_id', new Word('a9fc3e46-150a-45cd-ad39-c80f93119900'), null, true, 5.0), 1115 | ], 1116 | ], 1117 | 1118 | [ 1119 | 'name' => 'field with mentions and emails', 1120 | 'input' => 'email:john@doe.com -user:@twitterz', 1121 | 'expected_tokens' => [ 1122 | [T::T_FIELD_START, 'email'], 1123 | [T::T_WORD, 'john@doe.com'], 1124 | T::T_FIELD_END, 1125 | T::T_PROHIBITED, 1126 | [T::T_FIELD_START, 'user'], 1127 | [T::T_MENTION, 'twitterz'], 1128 | T::T_FIELD_END, 1129 | ], 1130 | 'expected_nodes' => [ 1131 | new Field('email', new Word('john@doe.com'), null, false, Field::DEFAULT_BOOST), 1132 | new Field( 1133 | 'user', 1134 | new Mention('twitterz', BoolOperator::REQUIRED), 1135 | BoolOperator::PROHIBITED, 1136 | false, 1137 | Field::DEFAULT_BOOST 1138 | ), 1139 | ], 1140 | ], 1141 | 1142 | [ 1143 | 'name' => 'field with hashtags', 1144 | 'input' => 'tags:#cats tags:(#cats || #dogs)', 1145 | 'expected_tokens' => [ 1146 | [T::T_FIELD_START, 'tags'], 1147 | [T::T_HASHTAG, 'cats'], 1148 | T::T_FIELD_END, 1149 | [T::T_FIELD_START, 'tags'], 1150 | T::T_SUBQUERY_START, 1151 | [T::T_HASHTAG, 'cats'], 1152 | T::T_OR, 1153 | [T::T_HASHTAG, 'dogs'], 1154 | T::T_SUBQUERY_END, 1155 | T::T_FIELD_END, 1156 | ], 1157 | 'expected_nodes' => [ 1158 | new Field( 1159 | 'tags', 1160 | new Hashtag('cats', BoolOperator::REQUIRED), 1161 | null, 1162 | false, 1163 | Field::DEFAULT_BOOST 1164 | ), 1165 | new Field( 1166 | 'tags', 1167 | new Subquery([ 1168 | new Hashtag('cats', BoolOperator::REQUIRED), 1169 | new Hashtag('dogs', BoolOperator::REQUIRED), 1170 | ]), 1171 | null, 1172 | false, 1173 | Field::DEFAULT_BOOST 1174 | ), 1175 | ], 1176 | ], 1177 | /* 1178 | * END: FIELDS 1179 | */ 1180 | 1181 | 1182 | /* 1183 | * START: WORDS 1184 | */ 1185 | [ 1186 | 'name' => 'word with hashtag or mention in it', 1187 | 'input' => 'omg#lol omg@user @mention#tag #tag@mention', 1188 | 'expected_tokens' => [ 1189 | [T::T_WORD, 'omg#lol'], 1190 | [T::T_WORD, 'omg@user'], 1191 | [T::T_WORD, 'mention#tag'], 1192 | [T::T_WORD, 'tag@mention'], 1193 | ], 1194 | 'expected_nodes' => [ 1195 | new Word('omg#lol'), 1196 | new Word('omg@user'), 1197 | new Word('mention#tag'), 1198 | new Word('tag@mention'), 1199 | ], 1200 | ], 1201 | 1202 | [ 1203 | 'name' => 'required/prohibited words', 1204 | 'input' => '+c.h.u.d. -zombieland +ac/dc^5', 1205 | 'expected_tokens' => [ 1206 | T::T_REQUIRED, 1207 | [T::T_WORD, 'c.h.u.d'], 1208 | T::T_PROHIBITED, 1209 | [T::T_WORD, 'zombieland'], 1210 | T::T_REQUIRED, 1211 | [T::T_WORD, 'ac/dc'], 1212 | T::T_BOOST, 1213 | [T::T_NUMBER, 5.0], 1214 | ], 1215 | 'expected_nodes' => [ 1216 | new Word('c.h.u.d', BoolOperator::REQUIRED), 1217 | new Word('zombieland', BoolOperator::PROHIBITED), 1218 | new Word('ac/dc', BoolOperator::REQUIRED, true, 5.0), 1219 | ], 1220 | ], 1221 | 1222 | [ 1223 | 'name' => 'words that have embedded operators', 1224 | 'input' => 'cANDy AND OReos || dANDy && chORes^5', 1225 | 'expected_tokens' => [ 1226 | [T::T_WORD, 'cANDy'], 1227 | T::T_AND, 1228 | [T::T_WORD, 'OReos'], 1229 | T::T_OR, 1230 | [T::T_WORD, 'dANDy'], 1231 | T::T_AND, 1232 | [T::T_WORD, 'chORes'], 1233 | T::T_BOOST, 1234 | [T::T_NUMBER, 5.0], 1235 | ], 1236 | 'expected_nodes' => [ 1237 | new Word('cANDy', BoolOperator::REQUIRED), 1238 | new Word('OReos', BoolOperator::REQUIRED), 1239 | new Word('dANDy', BoolOperator::REQUIRED), 1240 | new Word('chORes', BoolOperator::REQUIRED, true, 5.0), 1241 | ], 1242 | ], 1243 | /* 1244 | * END: WORDS 1245 | */ 1246 | 1247 | 1248 | /* 1249 | * START: DATES 1250 | */ 1251 | [ 1252 | 'name' => 'dates in string', 1253 | 'input' => '2000-01-01 >=2000-01-01 (+2015-12-18) -2015-12-18', 1254 | 'expected_tokens' => [ 1255 | [T::T_DATE, '2000-01-01'], 1256 | [T::T_DATE, '2000-01-01'], 1257 | T::T_SUBQUERY_START, 1258 | T::T_REQUIRED, 1259 | [T::T_DATE, '2015-12-18'], 1260 | T::T_SUBQUERY_END, 1261 | T::T_PROHIBITED, 1262 | [T::T_DATE, '2015-12-18'], 1263 | ], 1264 | 'expected_nodes' => [ 1265 | new Date('2000-01-01'), 1266 | new Date('2000-01-01'), 1267 | new Date('2015-12-18', BoolOperator::REQUIRED), 1268 | new Date('2015-12-18', BoolOperator::PROHIBITED), 1269 | ], 1270 | ], 1271 | 1272 | [ 1273 | 'name' => 'dates on dates', 1274 | 'input' => '2000-01-012000-01-01 2000-01-01^2000-01-01', 1275 | 'expected_tokens' => [ 1276 | [T::T_WORD, '2000-01-012000-01-01'], 1277 | [T::T_DATE, '2000-01-01'], 1278 | T::T_BOOST, 1279 | [T::T_DATE, '2000-01-01'], 1280 | ], 1281 | 'expected_nodes' => [ 1282 | new Word('2000-01-012000-01-01'), 1283 | new Date('2000-01-01'), 1284 | new Date('2000-01-01'), 1285 | ], 1286 | ], 1287 | /* 1288 | * END: DATES 1289 | */ 1290 | 1291 | 1292 | /* 1293 | * START: ACCENTED CHARS 1294 | */ 1295 | [ 1296 | 'name' => 'accents and hyphens', 1297 | 'input' => '+Beyoncé Giselle Knowles-Carter', 1298 | 'expected_tokens' => [ 1299 | T::T_REQUIRED, 1300 | [T::T_WORD, 'Beyoncé'], 1301 | [T::T_WORD, 'Giselle'], 1302 | [T::T_WORD, 'Knowles-Carter'], 1303 | ], 1304 | 'expected_nodes' => [ 1305 | new Word('Beyoncé', BoolOperator::REQUIRED), 1306 | new Word('Giselle'), 1307 | new Word('Knowles-Carter'), 1308 | ], 1309 | ], 1310 | 1311 | [ 1312 | 'name' => 'accents and hyphen spice', 1313 | 'input' => 'J. Lo => Emme Maribel Muñiz $p0rty-spicé', 1314 | 'expected_tokens' => [ 1315 | [T::T_WORD, 'J'], 1316 | [T::T_WORD, 'Lo'], 1317 | [T::T_WORD, 'Emme'], 1318 | [T::T_WORD, 'Maribel'], 1319 | [T::T_WORD, 'Muñiz'], 1320 | [T::T_WORD, '$p0rty-spicé'], 1321 | ], 1322 | 'expected_nodes' => [ 1323 | new Word('J'), 1324 | new Word('Lo'), 1325 | new Word('Emme'), 1326 | new Word('Maribel'), 1327 | new Word('Muñiz'), 1328 | new Word('$p0rty-spicé'), 1329 | ], 1330 | ], 1331 | 1332 | [ 1333 | 'name' => 'utf chars', 1334 | 'input' => '测试 測試', 1335 | 'expected_tokens' => [ 1336 | [T::T_WORD, '测试'], 1337 | [T::T_WORD, '測試'], 1338 | ], 1339 | 'expected_nodes' => [ 1340 | new Word('测试'), 1341 | new Word('測試'), 1342 | ], 1343 | ], 1344 | /* 1345 | * END: ACCENTED CHARS 1346 | */ 1347 | 1348 | 1349 | /* 1350 | * START: RAPPERS and POP STARS 1351 | */ 1352 | [ 1353 | 'name' => 'crazy a$$ names', 1354 | 'input' => 'p!nk AND K$sha in a tr33 with 50¢', 1355 | 'expected_tokens' => [ 1356 | [T::T_WORD, 'p!nk'], 1357 | T::T_AND, 1358 | [T::T_WORD, 'K$sha'], 1359 | [T::T_WORD, 'in'], 1360 | [T::T_WORD, 'a'], 1361 | [T::T_WORD, 'tr33'], 1362 | [T::T_WORD, 'with'], 1363 | [T::T_WORD, '50¢'], 1364 | ], 1365 | 'expected_nodes' => [ 1366 | new Word('p!nk', BoolOperator::REQUIRED), 1367 | new Word('K$sha', BoolOperator::REQUIRED), 1368 | new Word('in'), 1369 | new Word('a'), 1370 | new Word('tr33'), 1371 | new Word('with'), 1372 | new Word('50¢'), 1373 | ], 1374 | ], 1375 | 1376 | [ 1377 | 'name' => 'my name is math(ish)', 1378 | 'input' => '+florence+machine ac/dc^11 Stellastarr* T\'Pau ​¡Forward, Russia! "¡Forward, Russia!"~', 1379 | 'expected_tokens' => [ 1380 | T::T_REQUIRED, 1381 | [T::T_WORD, 'florence+machine'], 1382 | [T::T_WORD, 'ac/dc'], 1383 | T::T_BOOST, 1384 | [T::T_NUMBER, 11.0], 1385 | [T::T_WORD, 'Stellastarr'], 1386 | T::T_WILDCARD, 1387 | [T::T_WORD, 'T\'Pau'], 1388 | [T::T_WORD, '​¡Forward'], 1389 | [T::T_WORD, 'Russia'], 1390 | [T::T_PHRASE, '¡Forward, Russia!'], 1391 | T::T_FUZZY, 1392 | ], 1393 | 'expected_nodes' => [ 1394 | new Word('florence+machine', BoolOperator::REQUIRED), 1395 | new Word('ac/dc', null, true, Word::MAX_BOOST), 1396 | new Word('Stellastarr', null, false, Word::DEFAULT_BOOST, false, Word::DEFAULT_FUZZY, true), 1397 | new Word('T\'Pau'), 1398 | new Word('​¡Forward'), 1399 | new Word('Russia'), 1400 | new Phrase('¡Forward, Russia!', null, false, Phrase::DEFAULT_BOOST, true, Phrase::DEFAULT_FUZZY), 1401 | ], 1402 | ], 1403 | /* 1404 | * END: RAPPERS and POP STARS 1405 | */ 1406 | 1407 | 1408 | /* 1409 | * START: SUBQUERIES 1410 | */ 1411 | [ 1412 | 'name' => 'mismatched subqueries', 1413 | 'input' => ') test (123 (abc f:a)', 1414 | 'expected_tokens' => [ 1415 | [T::T_WORD, 'test'], 1416 | T::T_SUBQUERY_START, 1417 | [T::T_NUMBER, 123.0], 1418 | [T::T_WORD, 'abc'], 1419 | [T::T_WORD, 'f:a'], 1420 | T::T_SUBQUERY_END, 1421 | ], 1422 | 'expected_nodes' => [ 1423 | new Word('test'), 1424 | new Subquery([new Numbr(123.0), new Word('abc'), new Word('f:a')]), 1425 | ], 1426 | ], 1427 | 1428 | [ 1429 | 'name' => 'filter inside of subquery', 1430 | 'input' => 'word(word:a>(#hashtag:b)', 1431 | 'expected_tokens' => [ 1432 | [T::T_WORD, 'word'], 1433 | T::T_SUBQUERY_START, 1434 | [T::T_WORD, 'word:a'], 1435 | [T::T_WORD, 'hashtag:b'], 1436 | T::T_SUBQUERY_END, 1437 | ], 1438 | 'expected_nodes' => [ 1439 | new Word('word'), 1440 | new Subquery([new Word('word:a'), new Word('hashtag:b')]), 1441 | ], 1442 | ], 1443 | 1444 | [ 1445 | 'name' => 'booleans before and in subqueries', 1446 | 'input' => '"ipad pro" AND (gold OR silver)', 1447 | 'expected_tokens' => [ 1448 | [T::T_PHRASE, 'ipad pro'], 1449 | T::T_AND, 1450 | T::T_SUBQUERY_START, 1451 | [T::T_WORD, 'gold'], 1452 | T::T_OR, 1453 | [T::T_WORD, 'silver'], 1454 | T::T_SUBQUERY_END, 1455 | ], 1456 | 'expected_nodes' => [ 1457 | new Phrase('ipad pro', BoolOperator::REQUIRED), 1458 | new Subquery([new Word('gold'), new Word('silver')], BoolOperator::REQUIRED), 1459 | ], 1460 | ], 1461 | 1462 | [ 1463 | 'name' => 'booleans before and in subqueries 2', 1464 | 'input' => '"iphone 7" -(16gb OR 32gb)', 1465 | 'expected_tokens' => [ 1466 | [T::T_PHRASE, 'iphone 7'], 1467 | T::T_PROHIBITED, 1468 | T::T_SUBQUERY_START, 1469 | [T::T_WORD, '16gb'], 1470 | T::T_OR, 1471 | [T::T_WORD, '32gb'], 1472 | T::T_SUBQUERY_END, 1473 | ], 1474 | 'expected_nodes' => [ 1475 | new Phrase('iphone 7'), 1476 | new Subquery([new Word('16gb'), new Word('32gb')], BoolOperator::PROHIBITED), 1477 | ], 1478 | ], 1479 | /* 1480 | * END: SUBQUERIES 1481 | */ 1482 | 1483 | 1484 | /* 1485 | * START: WEIRD QUERIES 1486 | */ 1487 | [ 1488 | 'name' => 'whip nae nae', 1489 | 'input' => 'Watch Me (Whip/Nae Nae)', 1490 | 'expected_tokens' => [ 1491 | [T::T_WORD, 'Watch'], 1492 | [T::T_WORD, 'Me'], 1493 | T::T_SUBQUERY_START, 1494 | [T::T_WORD, 'Whip/Nae'], 1495 | [T::T_WORD, 'Nae'], 1496 | T::T_SUBQUERY_END, 1497 | ], 1498 | 'expected_nodes' => [ 1499 | new Word('Watch'), 1500 | new Word('Me'), 1501 | new Subquery([new Word('Whip/Nae'), new Word('Nae')]), 1502 | ], 1503 | ], 1504 | 1505 | [ 1506 | 'name' => 'epic or fail', 1507 | 'input' => 'epic or fail', 1508 | 'expected_tokens' => [ 1509 | [T::T_WORD, 'epic'], 1510 | [T::T_WORD, 'or'], 1511 | [T::T_WORD, 'fail'], 1512 | ], 1513 | 'expected_nodes' => [ 1514 | new Word('epic'), 1515 | new Word('or'), 1516 | new Word('fail'), 1517 | ], 1518 | ], 1519 | 1520 | [ 1521 | 'name' => 'use of || then and required subquery', 1522 | 'input' => 'test || AND what (+test)', 1523 | 'expected_tokens' => [ 1524 | [T::T_WORD, 'test'], 1525 | T::T_OR, 1526 | T::T_AND, 1527 | [T::T_WORD, 'what'], 1528 | T::T_SUBQUERY_START, 1529 | T::T_REQUIRED, 1530 | [T::T_WORD, 'test'], 1531 | T::T_SUBQUERY_END, 1532 | ], 1533 | 'expected_nodes' => [ 1534 | new Word('test'), 1535 | new Word('what', BoolOperator::REQUIRED), 1536 | new Word('test', BoolOperator::REQUIRED), 1537 | ], 1538 | ], 1539 | 1540 | [ 1541 | 'name' => 'mega subqueries, all non-sensical', 1542 | 'input' => 'test OR ( ( 1 ) OR ( ( 2 ) ) OR ( ( ( 3.14 ) ) ) OR a OR +b ) OR +field:>1', 1543 | 'expected_tokens' => [ 1544 | [T::T_WORD, 'test'], 1545 | T::T_OR, 1546 | T::T_SUBQUERY_START, 1547 | [T::T_NUMBER, 1.0], 1548 | T::T_SUBQUERY_END, 1549 | T::T_OR, 1550 | T::T_SUBQUERY_START, 1551 | [T::T_NUMBER, 2.0], 1552 | T::T_SUBQUERY_END, 1553 | T::T_OR, 1554 | T::T_SUBQUERY_START, 1555 | [T::T_NUMBER, 3.14], 1556 | T::T_SUBQUERY_END, 1557 | T::T_OR, 1558 | [T::T_WORD, 'a'], 1559 | T::T_OR, 1560 | T::T_REQUIRED, 1561 | [T::T_WORD, 'b'], 1562 | T::T_OR, 1563 | T::T_REQUIRED, 1564 | [T::T_FIELD_START, 'field'], 1565 | T::T_GREATER_THAN, 1566 | [T::T_NUMBER, 1.0], 1567 | T::T_FIELD_END, 1568 | ], 1569 | 'expected_nodes' => [ 1570 | new Word('test'), 1571 | new Numbr(1), 1572 | new Numbr(2), 1573 | new Numbr(3.14), 1574 | new Word('a'), 1575 | new Word('b', BoolOperator::REQUIRED), 1576 | new Field( 1577 | 'field', 1578 | new Numbr(1.0, ComparisonOperator::GT), 1579 | BoolOperator::REQUIRED, 1580 | false, 1581 | Field::DEFAULT_BOOST 1582 | ), 1583 | ], 1584 | ], 1585 | 1586 | [ 1587 | 'name' => 'common dotted things', 1588 | 'input' => 'R.I.P. Motörhead', 1589 | 'expected_tokens' => [ 1590 | [T::T_WORD, 'R.I.P'], 1591 | [T::T_WORD, 'Motörhead'], 1592 | ], 1593 | 'expected_nodes' => [ 1594 | new Word('R.I.P'), 1595 | new Word('Motörhead'), 1596 | ], 1597 | ], 1598 | 1599 | [ 1600 | 'name' => 'ignored chars', 1601 | 'input' => '!!! ! $ _ . ; %', 1602 | 'expected_tokens' => [], 1603 | 'expected_nodes' => [], 1604 | ], 1605 | 1606 | [ 1607 | 'name' => 'elastic search example 1', 1608 | 'input' => '"john smith"^2 (foo bar)^4', 1609 | 'expected_tokens' => [ 1610 | [T::T_PHRASE, 'john smith'], 1611 | T::T_BOOST, 1612 | [T::T_NUMBER, 2.0], 1613 | T::T_SUBQUERY_START, 1614 | [T::T_WORD, 'foo'], 1615 | [T::T_WORD, 'bar'], 1616 | T::T_SUBQUERY_END, 1617 | T::T_BOOST, 1618 | [T::T_NUMBER, 4.0], 1619 | ], 1620 | 'expected_nodes' => [ 1621 | new Phrase('john smith', null, true, 2.0), 1622 | new Subquery([new Word('foo'), new Word('bar')], null, true, 4.0), 1623 | ], 1624 | ], 1625 | 1626 | [ 1627 | 'name' => 'intentionally mutant', 1628 | 'input' => '[blah "[[shortcode]]" akd_ -gj% ! @* (+=} --> ;\' [ 1704 | [T::T_WORD, 'a"b"#c"#d'], 1705 | [T::T_WORD, 'e'], 1706 | ], 1707 | 'expected_nodes' => [ 1708 | new Word('a"b"#c"#d'), 1709 | new Word('e'), 1710 | ], 1711 | ], 1712 | 1713 | [ 1714 | 'name' => 'xss1', 1715 | 'input' => '', 1716 | 'expected_tokens' => [ 1717 | [T::T_WORD, 'IMG'], 1718 | [T::T_WORD, 'SRC'], 1719 | [T::T_WORD, 'jAvascript:alert'], 1720 | T::T_SUBQUERY_START, 1721 | [T::T_WORD, 'test2'], 1722 | T::T_SUBQUERY_END, 1723 | ], 1724 | 'expected_nodes' => [ 1725 | new Word('IMG'), 1726 | new Word('SRC'), 1727 | new Word('jAvascript:alert'), 1728 | new Word('test2'), 1729 | ], 1730 | ], 1731 | 1732 | [ 1733 | 'name' => 'should not be required', 1734 | 'input' => 'token + token', 1735 | 'expected_tokens' => [ 1736 | [T::T_WORD, 'token'], 1737 | [T::T_WORD, 'token'], 1738 | ], 1739 | 'expected_nodes' => [ 1740 | new Word('token'), 1741 | new Word('token'), 1742 | ], 1743 | ], 1744 | 1745 | [ 1746 | 'name' => 'should not be prohibited', 1747 | 'input' => 'token - token', 1748 | 'expected_tokens' => [ 1749 | [T::T_WORD, 'token'], 1750 | [T::T_WORD, 'token'], 1751 | ], 1752 | 'expected_nodes' => [ 1753 | new Word('token'), 1754 | new Word('token'), 1755 | ], 1756 | ], 1757 | 1758 | [ 1759 | 'name' => 'should not be boosted', 1760 | 'input' => 'token ^5 token', 1761 | 'expected_tokens' => [ 1762 | [T::T_WORD, 'token'], 1763 | [T::T_NUMBER, 5.0], 1764 | [T::T_WORD, 'token'], 1765 | ], 1766 | 'expected_nodes' => [ 1767 | new Word('token'), 1768 | new Numbr(5.0), 1769 | new Word('token'), 1770 | ], 1771 | ], 1772 | 1773 | [ 1774 | 'name' => 'should not have words or phrases without real characters', 1775 | 'input' => 'test taco-spice chester:copperpot :: : ; ;; " " , - -- - ++ "a phrase:" _ [ ] { } | \\ / ` * ~ ! @ ( ) # $ % ^ & = < > ?', 1776 | 'expected_tokens' => [ 1777 | [T::T_WORD, 'test'], 1778 | [T::T_WORD, 'taco-spice'], 1779 | [T::T_FIELD_START, 'chester'], 1780 | [T::T_WORD, 'copperpot'], 1781 | T::T_FIELD_END, 1782 | T::T_PROHIBITED, 1783 | T::T_REQUIRED, 1784 | [T::T_PHRASE, 'a phrase:'], 1785 | T::T_WILDCARD, 1786 | T::T_SUBQUERY_START, 1787 | T::T_SUBQUERY_END, 1788 | ], 1789 | 'expected_nodes' => [ 1790 | new Word('test'), 1791 | new Word('taco-spice'), 1792 | new Field('chester', new Word('copperpot')), 1793 | new Phrase('a phrase:'), 1794 | ], 1795 | ], 1796 | /* 1797 | * END: WEIRD QUERIES 1798 | */ 1799 | ]; 1800 | -------------------------------------------------------------------------------- /tests/QueryParserTest.php: -------------------------------------------------------------------------------- 1 | parser = new QueryParser(); 16 | } 17 | 18 | /** 19 | * @dataProvider getTestQueries 20 | * 21 | * @param string $name 22 | * @param string $input 23 | * @param null $ignored 24 | * @param array $expectedNodes 25 | */ 26 | public function testParse(string $name, string $input, $ignored, array $expectedNodes = []): void 27 | { 28 | $result = $this->parser->parse($input); 29 | $this->assertEquals($expectedNodes, $result->getNodes(), "Test query [{$name}] with input [{$input}] failed."); 30 | } 31 | 32 | public function getTestQueries(): array 33 | { 34 | return require __DIR__ . '/Fixtures/test-queries.php'; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /tests/TokenizerTest.php: -------------------------------------------------------------------------------- 1 | tokenizer = new Tokenizer(); 17 | } 18 | 19 | public function testOnlyWhitespace(): void 20 | { 21 | $this->assertEquals([], $this->tokenizer->scan(' ')->getTokens()); 22 | } 23 | 24 | /** 25 | * @dataProvider getTestQueries 26 | * 27 | * @param string $name 28 | * @param string $input 29 | * @param array $expectedTokens 30 | */ 31 | public function testScan(string $name, string $input, array $expectedTokens): void 32 | { 33 | // convert the sample 'expected' into token objects. 34 | foreach ($expectedTokens as $k => $v) { 35 | if (!is_array($v)) { 36 | $expectedTokens[$k] = new T($v); 37 | continue; 38 | } 39 | 40 | $expectedTokens[$k] = new T($v[0], $v[1]); 41 | } 42 | 43 | $tokenStream = $this->tokenizer->scan($input); 44 | $this->assertEquals($expectedTokens, $tokenStream->getTokens(), "Test query [{$name}] with input [{$input}] failed."); 45 | } 46 | 47 | public function getTestQueries(): array 48 | { 49 | return require __DIR__ . '/Fixtures/test-queries.php'; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 |