├── .editorconfig
├── .gitignore
├── CHANGELOG-0.x.md
├── CHANGELOG-1.x.md
├── CHANGELOG-2.x.md
├── CHANGELOG-3.x.md
├── LICENSE.md
├── README.md
├── composer.json
├── examples
├── elastica.php
└── xml.php
├── phpunit.xml.dist
├── src
├── Builder
│ ├── AbstractQueryBuilder.php
│ ├── ElasticaQueryBuilder.php
│ ├── QueryBuilder.php
│ └── XmlQueryBuilder.php
├── Enum
│ ├── BoolOperator.php
│ └── ComparisonOperator.php
├── Node
│ ├── Date.php
│ ├── DateRange.php
│ ├── Emoji.php
│ ├── Emoticon.php
│ ├── Field.php
│ ├── Hashtag.php
│ ├── Mention.php
│ ├── Node.php
│ ├── NumberRange.php
│ ├── Numbr.php
│ ├── Phrase.php
│ ├── Range.php
│ ├── Subquery.php
│ ├── Url.php
│ ├── Word.php
│ └── WordRange.php
├── ParsedQuery.php
├── QueryParser.php
├── Token.php
├── TokenStream.php
└── Tokenizer.php
└── tests
├── Builder
└── XmlQueryBuilderTest.php
├── Fixtures
└── test-queries.php
├── QueryParserTest.php
├── TokenizerTest.php
└── bootstrap.php
/.editorconfig:
--------------------------------------------------------------------------------
1 | # editorconfig.org
2 |
3 | root = true
4 |
5 | [*]
6 | charset = utf-8
7 | end_of_line = lf
8 | indent_size = 2
9 | indent_style = space
10 | insert_final_newline = true
11 | trim_trailing_whitespace = true
12 |
13 | [*.{php,py}]
14 | indent_size = 4
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Symfony App Files #
2 | ############
3 | *.cache
4 | bin/
5 | build/
6 | cache/
7 | logs/
8 | tmp/
9 | vendor/
10 |
11 | # Deployment/IDE Tools #
12 | ############
13 | autoload.php
14 | composer.lock
15 | composer.phar
16 | phpunit.xml
17 | .buildpath
18 | *.iml
19 | .idea/
20 | .phpunit.result.cache
21 | .project
22 | .settings
23 |
24 | # Compiled source #
25 | ###################
26 | *.com
27 | *.class
28 | *.dll
29 | *.exe
30 | *.o
31 | *.so
32 |
33 | # Packages #
34 | ############
35 | # it's better to unpack these files and commit the raw source
36 | # git has its own built in compression methods
37 | *.7z
38 | *.dmg
39 | *.gz
40 | *.iso
41 | *.jar
42 | *.rar
43 | *.tar
44 | *.zip
45 |
46 | # Logs and databases #
47 | ######################
48 | *.log
49 | *.sql
50 | *.sqlite
51 |
52 | # OS generated files #
53 | ######################
54 | .DS_Store
55 | .DS_Store?
56 | ._*
57 | .Spotlight-V100
58 | .Trashes
59 | Icon?
60 | ehthumbs.db
61 | Thumbs.db
62 |
63 | # Vagrant provisioning #
64 | ########################
65 | .vagrant/
66 |
--------------------------------------------------------------------------------
/CHANGELOG-0.x.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG for 0.x
2 | This changelog references the relevant changes done in 0.x versions.
3 |
4 |
5 | ## v0.3.2
6 | * BUG :: Add minimum requirement check for word matching `[a-zA-Z0-9]+` and ignore empty phrases.
7 |
8 |
9 | ## v0.3.1
10 | * BUG :: Fix invalid string casting on Numbr in ElasticaQueryBuilder.
11 |
12 |
13 | ## v0.3.0
14 | __BREAKING CHANGES__
15 |
16 | * Update `ElasticaQueryBuilder` to use `"ruflin/elastica": "~5.3"`.
17 | * Require php `>=7.1` in `composer.json`.
18 | * Add php7 type hinting and use `declare(strict_types=1);`.
19 |
20 |
21 | ## v0.2.1
22 | * pull #9: Respect boolean operator preceding subquery.
23 |
24 |
25 | ## v0.2.0
26 | __BREAKING CHANGES__
27 |
28 | * issue #7: Update `ElasticaQueryBuilder` to use 2.x queries/filters. Requires `"ruflin/elastica": "~3.2"`.
29 | * issue #6: Make TimeZone configurable on any builders that use date nodes.
30 | * The `Number` class was renamed to `Numbr` to prevent issue with scalar type hints in php7.
31 |
32 |
33 | ## v0.1.2
34 | * Allow for `gdbots/common` ~0.1 or ~1.0.
35 |
36 |
37 | ## v0.1.1
38 | * issue #4: Adjust ElasticaQueryBuilder to be more "AND" like by default.
39 |
40 |
41 | ## v0.1.0
42 | * Initial version.
43 |
--------------------------------------------------------------------------------
/CHANGELOG-1.x.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG for 1.x
2 | This changelog references the relevant changes done in 1.x versions.
3 |
4 |
5 | ## v1.0.0
6 | * Initial stable version.
7 |
--------------------------------------------------------------------------------
/CHANGELOG-2.x.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG for 2.x
2 | This changelog references the relevant changes done in 2.x versions.
3 |
4 |
5 | ## v2.0.2
6 | * Add support for utf-8 characters when parsing words.
7 |
8 |
9 | ## v2.0.1
10 | * Do not truncate input in `Tokenizer::scan`. Removed `substr($input, 0, 256)` rule as we're unsure where/why it's there and seems safe to remove.
11 |
12 |
13 | ## v2.0.0
14 | __BREAKING CHANGES__
15 |
16 | * Require php `>=7.4`
17 | * Uses php7 type hinting throughout with `declare(strict_types=1);`
18 | * Uses `"ruflin/elastica": "^7.0"`
19 |
--------------------------------------------------------------------------------
/CHANGELOG-3.x.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG for 3.x
2 | This changelog references the relevant changes done in 3.x versions.
3 |
4 |
5 | ## v3.0.0
6 | __BREAKING CHANGES__
7 |
8 | * Require php 8.1.
9 | * Use new php enum instead of the home grown versions.
10 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # Apache License
2 | Version 2.0, January 2004
3 |
4 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
5 |
6 | ## 1. Definitions.
7 |
8 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1
9 | through 9 of this document.
10 |
11 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the
12 | License.
13 |
14 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled
15 | by, or are under common control with that entity. For the purposes of this definition, "control" means
16 | (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract
17 | or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial
18 | ownership of such entity.
19 |
20 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
21 |
22 | "Source" form shall mean the preferred form for making modifications, including but not limited to software
23 | source code, documentation source, and configuration files.
24 |
25 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form,
26 | including but not limited to compiled object code, generated documentation, and conversions to other media
27 | types.
28 |
29 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License,
30 | as indicated by a copyright notice that is included in or attached to the work (an example is provided in the
31 | Appendix below).
32 |
33 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from)
34 | the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent,
35 | as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not
36 | include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work
37 | and Derivative Works thereof.
38 |
39 | "Contribution" shall mean any work of authorship, including the original version of the Work and any
40 | modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to
41 | Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to
42 | submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of
43 | electronic, verbal, or written communication sent to the Licensor or its representatives, including but not
44 | limited to communication on electronic mailing lists, source code control systems, and issue tracking systems
45 | that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but
46 | excluding communication that is conspicuously marked or otherwise designated in writing by the copyright
47 | owner as "Not a Contribution."
48 |
49 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been
50 | received by Licensor and subsequently incorporated within the Work.
51 |
52 | ## 2. Grant of Copyright License.
53 |
54 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual,
55 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare
56 | Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such
57 | Derivative Works in Source or Object form.
58 |
59 | ## 3. Grant of Patent License.
60 |
61 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual,
62 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent
63 | license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such
64 | license applies only to those patent claims licensable by such Contributor that are necessarily infringed by
65 | their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such
66 | Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim
67 | or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work
68 | constitutes direct or contributory patent infringement, then any patent licenses granted to You under this
69 | License for that Work shall terminate as of the date such litigation is filed.
70 |
71 | ## 4. Redistribution.
72 |
73 | You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without
74 | modifications, and in Source or Object form, provided that You meet the following conditions:
75 |
76 | 1. You must give any other recipients of the Work or Derivative Works a copy of this License; and
77 |
78 | 2. You must cause any modified files to carry prominent notices stating that You changed the files; and
79 |
80 | 3. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent,
81 | trademark, and attribution notices from the Source form of the Work, excluding those notices that do
82 | not pertain to any part of the Derivative Works; and
83 |
84 | 4. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that
85 | You distribute must include a readable copy of the attribution notices contained within such NOTICE
86 | file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one
87 | of the following places: within a NOTICE text file distributed as part of the Derivative Works; within
88 | the Source form or documentation, if provided along with the Derivative Works; or, within a display
89 | generated by the Derivative Works, if and wherever such third-party notices normally appear. The
90 | contents of the NOTICE file are for informational purposes only and do not modify the License. You may
91 | add Your own attribution notices within Derivative Works that You distribute, alongside or as an
92 | addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be
93 | construed as modifying the License.
94 |
95 | You may add Your own copyright statement to Your modifications and may provide additional or different license
96 | terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative
97 | Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the
98 | conditions stated in this License.
99 |
100 | ## 5. Submission of Contributions.
101 |
102 | Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by
103 | You to the Licensor shall be under the terms and conditions of this License, without any additional terms or
104 | conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate
105 | license agreement you may have executed with Licensor regarding such Contributions.
106 |
107 | ## 6. Trademarks.
108 |
109 | This License does not grant permission to use the trade names, trademarks, service marks, or product names of
110 | the Licensor, except as required for reasonable and customary use in describing the origin of the Work and
111 | reproducing the content of the NOTICE file.
112 |
113 | ## 7. Disclaimer of Warranty.
114 |
115 | Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor
116 | provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
117 | or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
118 | MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the
119 | appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of
120 | permissions under this License.
121 |
122 | ## 8. Limitation of Liability.
123 |
124 | In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless
125 | required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any
126 | Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential
127 | damages of any character arising as a result of this License or out of the use or inability to use the Work
128 | (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or
129 | any and all other commercial damages or losses), even if such Contributor has been advised of the possibility
130 | of such damages.
131 |
132 | ## 9. Accepting Warranty or Additional Liability.
133 |
134 | While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for,
135 | acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this
136 | License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole
137 | responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold
138 | each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason
139 | of your accepting any such warranty or additional liability.
140 |
141 | END OF TERMS AND CONDITIONS
142 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | query-parser-php
2 | =============
3 |
4 | [](https://travis-ci.org/gdbots/query-parser-php)
5 |
6 | Php library that converts search queries into words, phrases, hashtags, mentions, etc.
7 |
8 | This library supports a simple search query standard. It is meant to support the most common search combinations that a
9 | user would likely enter into your website search box or dashboard application. It intentionally limits the more complex nested capabilities
10 | that you might expect from SQL builders, Lucene, etc.
11 |
12 |
13 | ## Tokenizer
14 | Tokens are split on whitespace unless enclosed in double quotes. The following tokens are extracted by the `Tokenizer`:
15 |
16 | ``` php
17 | class Token implements \JsonSerializable
18 | {
19 | const T_EOI = 0; // end of input
20 | const T_WHITE_SPACE = 1;
21 | const T_IGNORED = 2; // an ignored token, e.g. #, !, etc. when found by themselves, don't do anything with them.
22 | const T_NUMBER = 3; // 10, 0.8, .64, 6.022e23
23 | const T_REQUIRED = 4; // '+'
24 | const T_PROHIBITED = 5; // '-'
25 | const T_GREATER_THAN = 6; // '>'
26 | const T_LESS_THAN = 7; // '<'
27 | const T_EQUALS = 8; // '='
28 | const T_FUZZY = 9; // '~'
29 | const T_BOOST = 10; // '^'
30 | const T_RANGE_INCL_START = 11; // '['
31 | const T_RANGE_INCL_END = 12; // ']'
32 | const T_RANGE_EXCL_START = 13; // '{'
33 | const T_RANGE_EXCL_END = 14; // '}'
34 | const T_SUBQUERY_START = 15; // '('
35 | const T_SUBQUERY_END = 16; // ')'
36 | const T_WILDCARD = 17; // '*'
37 | const T_AND = 18; // 'AND' or '&&'
38 | const T_OR = 19; // 'OR' or '||'
39 | const T_TO = 20; // 'TO' or '..'
40 | const T_WORD = 21;
41 | const T_FIELD_START = 22; // The "field:" portion of "field:value".
42 | const T_FIELD_END = 23; // when a field lexeme ends, i.e. "field:value". This token has no value.
43 | const T_PHRASE = 24; // Phrase (one or more quoted words)
44 | const T_URL = 25; // a valid url
45 | const T_DATE = 26; // date in the format YYYY-MM-DD
46 | const T_HASHTAG = 27; // #hashtag
47 | const T_MENTION = 28; // @mention
48 | const T_EMOTICON = 29; // see https://en.wikipedia.org/wiki/Emoticon
49 | const T_EMOJI = 30; // see https://en.wikipedia.org/wiki/Emoji
50 | ```
51 | The `T_WHITE_SPACE` and `T_IGNORED` tokens are removed before the output is returned by the scan process.
52 |
53 |
54 | ## QueryParser
55 |
56 | The default query parser produces a `ParsedQuery` object which can be used with a builder to produce a query
57 | for a given search service.
58 |
59 |
60 | #### Basic Usage
61 |
62 | ``` php
63 | setHashtagFieldName('tags');
70 |
71 | $result = $parser->parse('hello^5 planet:earth +date:2015-12-25 #omg');
72 | echo $builder->addParsedQuery($result)->toXmlString();
73 | ```
74 | Produces the following xml:
75 | ``` xml
76 |
77 |
78 | hello
79 |
80 | earth
81 |
82 |
83 | 2015-12-25
84 |
85 |
86 | omg
87 |
88 |
89 | ```
90 |
91 |
92 | To get a list of `Node` objects by type, use:
93 |
94 | ``` php
95 | parse('#hashtag1 AND #hashtag2');
100 | $hashtags = $result->getNodesOfType(Hashtag::NODE_TYPE);
101 | ```
102 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "gdbots/query-parser",
3 | "homepage": "https://github.com/gdbots/query-parser-php",
4 | "description": "Php library that converts search queries into terms, phrases, hashtags, mentions, etc.",
5 | "type": "library",
6 | "license": "Apache-2.0",
7 | "require": {
8 | "php": ">=8.1"
9 | },
10 | "require-dev": {
11 | "phpunit/phpunit": "^9.5",
12 | "ruflin/elastica": "^7.1"
13 | },
14 | "autoload": {
15 | "psr-4": {
16 | "Gdbots\\QueryParser\\": "src"
17 | }
18 | },
19 | "autoload-dev": {
20 | "psr-4": {
21 | "Gdbots\\Tests\\QueryParser\\": "tests"
22 | }
23 | },
24 | "scripts": {
25 | "test": "vendor/bin/phpunit"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/examples/elastica.php:
--------------------------------------------------------------------------------
1 | [['host' => $host, 'port' => $port]]]);
39 | $client->setLogger(new EchoLogger());
40 |
41 | $parser = new QueryParser();
42 | /** @var ElasticaQueryBuilder $builder */
43 | $builder = (new ElasticaQueryBuilder())
44 | ->addNestedField('dynamic_fields')
45 | ->setDefaultFieldName('_all')
46 | ->setEmoticonFieldName('emoticons')
47 | ->setHashtagFieldName('hashtags')
48 | ->setMentionFieldName('mentions')
49 | ->addFullTextSearchField('subject')
50 | ->addFullTextSearchField('dynamic_fields.string_val')
51 | ->addFullTextSearchField('dynamic_fields.text_val')
52 | ->setLocalTimeZone(new DateTimeZone('America/Los_Angeles'))
53 | ;
54 |
55 | $qs = isset($argv[1]) ? $argv[1] : 'test';
56 | $parsedQuery = $parser->parse($qs);
57 | if (!$parsedQuery->hasAMatchableNode()) {
58 | echo 'query: '.$qs.PHP_EOL;
59 | echo 'has no matchable nodes.'.str_repeat(PHP_EOL, 3);
60 | exit;
61 | }
62 | $builder->addParsedQuery($parsedQuery);
63 |
64 | $options = [Search::OPTION_FROM => 0, Search::OPTION_SIZE => 5];
65 | $query = $builder->getBoolQuery();
66 | /*
67 | $query = (new FunctionScore())
68 | ->setQuery($query)
69 | ->setBoostMode(FunctionScore::BOOST_MODE_SUM)
70 | ->addFunction('field_value_factor', [
71 | 'field' => 'priority',
72 | 'modifier' => 'none',
73 | ], null, 0.4);
74 | */
75 | $query = \Elastica\Query::create($query);
76 | //$query->setExplain(true);
77 | $query->setSort(['date_sent' => 'desc']);
78 | $results = $client->getIndex($index)->search($query, $options);
79 |
80 | echo 'Total Time (ms) / Records Found:' . PHP_EOL;
81 | echo $results->getTotalTime() . 'ms / ' . $results->getTotalHits() . ' records' . str_repeat(PHP_EOL, 3);
82 | //echo json_encode($results->getResponse()->getData(), JSON_PRETTY_PRINT);
83 |
84 | foreach ($results as $result) {
85 | fgets(STDIN);
86 | echo json_encode($result->getSource(), JSON_PRETTY_PRINT) . PHP_EOL;
87 | echo str_repeat(PHP_EOL, 3).str_repeat('*', 70).str_repeat(PHP_EOL, 3);
88 | }
89 |
--------------------------------------------------------------------------------
/examples/xml.php:
--------------------------------------------------------------------------------
1 | setEmoticonFieldName('emoticons')
14 | ->setHashtagFieldName('tags')
15 | ->setMentionFieldName('mentions')
16 | ;
17 |
18 | $header = str_repeat(PHP_EOL, 4).'#### %s'.PHP_EOL;
19 |
20 | foreach ($tests as $test) {
21 | $result = $parser->parse($test['input']);
22 |
23 | echo sprintf($header, 'START TEST: '.$test['name']);
24 | echo $test['input'];
25 |
26 |
27 | echo sprintf($header, 'RAW NODES AS JSON');
28 | echo json_encode($result, JSON_PRETTY_PRINT);
29 |
30 |
31 | echo sprintf($header, 'NODES AS XML');
32 | $xml = $builder->clear()->addParsedQuery($result)->toXmlString();
33 | echo $xml;
34 |
35 |
36 | echo str_repeat(PHP_EOL, 10).str_repeat('*', 70).str_repeat(PHP_EOL, 5);
37 | fgets(STDIN);
38 | }
39 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | tests/
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/src/Builder/AbstractQueryBuilder.php:
--------------------------------------------------------------------------------
1 | true,
38 | '_all' => true,
39 | 'title' => true,
40 | 'tiny_title' => true,
41 | 'short_title' => true,
42 | 'excerpt' => true,
43 | 'description' => true,
44 | 'overview' => true,
45 | 'summary' => true,
46 | 'story' => true,
47 | 'html' => true,
48 | 'text' => true,
49 | 'markdown' => true,
50 | 'content' => true,
51 | 'contents' => true,
52 | 'contents-continued' => true,
53 | 'contents-md' => true,
54 | 'contents-mobile' => true,
55 | 'mobile-contents' => true,
56 | 'txt-contents' => true,
57 | 'text-contents' => true,
58 | 'abstract' => true,
59 | 'search_text' => true,
60 | 'cover' => true,
61 | 'bio' => true,
62 | 'mini_bio' => true,
63 | 'meta_title' => true,
64 | 'meta_description' => true,
65 | 'meta_keywords' => true,
66 | 'og_title' => true,
67 | 'og_description' => true,
68 | 'og_keywords' => true,
69 | 'seo_title' => true,
70 | 'seo_description' => true,
71 | 'seo_keywords' => true,
72 | 'img_credit' => true,
73 | 'img_caption' => true,
74 | 'credit' => true,
75 | 'caption' => true,
76 | 'img_credits' => true,
77 | 'img_captions' => true,
78 | 'image_credits' => true,
79 | 'image_captions' => true,
80 | 'credits' => true,
81 | 'captions' => true,
82 | 'full_name' => true,
83 | 'first_name' => true,
84 | 'last_name' => true,
85 | 'street1' => true,
86 | 'street2' => true,
87 | 'city' => true,
88 | 'address.street1' => true,
89 | 'address.street2' => true,
90 | 'address.city' => true,
91 | 'ctx_ip_geo.street1' => true,
92 | 'ctx_ip_geo.street2' => true,
93 | 'ctx_ip_geo.city' => true,
94 | ];
95 |
96 | protected string $defaultFieldName = '_all';
97 | protected ?string $emojiFieldName = null;
98 | protected ?string $emoticonFieldName = null;
99 | protected ?string $hashtagFieldName = null;
100 | protected ?string $mentionFieldName = null;
101 | protected ?\DateTimeZone $localTimeZone = null;
102 |
103 | public function clear(): self
104 | {
105 | return $this;
106 | }
107 |
108 | final public function setFullTextSearchFields(array $fields): self
109 | {
110 | $this->fullTextSearchFields = array_flip($fields);
111 | return $this;
112 | }
113 |
114 | final public function addFullTextSearchField(string $fieldName): self
115 | {
116 | $this->fullTextSearchFields[$fieldName] = true;
117 | return $this;
118 | }
119 |
120 | final public function removeFullTextSearchField(string $fieldName): self
121 | {
122 | unset($this->fullTextSearchFields[$fieldName]);
123 | return $this;
124 | }
125 |
126 | final public function getFullTextSearchFields(): array
127 | {
128 | return array_keys($this->fullTextSearchFields);
129 | }
130 |
131 | final public function supportsFullTextSearch(string $fieldName): bool
132 | {
133 | return isset($this->fullTextSearchFields[trim(strtolower($fieldName))]);
134 | }
135 |
136 | final public function setDefaultFieldName(string $fieldName): self
137 | {
138 | $this->defaultFieldName = $fieldName;
139 | return $this;
140 | }
141 |
142 | final public function setEmojiFieldName(string $fieldName): self
143 | {
144 | $this->emojiFieldName = $fieldName;
145 | return $this;
146 | }
147 |
148 | final public function setEmoticonFieldName(string $fieldName): self
149 | {
150 | $this->emoticonFieldName = $fieldName;
151 | return $this;
152 | }
153 |
154 | final public function setHashtagFieldName(string $fieldName): self
155 | {
156 | $this->hashtagFieldName = $fieldName;
157 | return $this;
158 | }
159 |
160 | final public function setMentionFieldName(string $fieldName): self
161 | {
162 | $this->mentionFieldName = $fieldName;
163 | return $this;
164 | }
165 |
166 | final public function setLocalTimeZone(\DateTimeZone $timeZone): self
167 | {
168 | $this->localTimeZone = $timeZone;
169 | return $this;
170 | }
171 |
172 | final public function addParsedQuery(ParsedQuery $parsedQuery): self
173 | {
174 | foreach ($parsedQuery->getNodes() as $node) {
175 | $node->acceptBuilder($this);
176 | }
177 |
178 | return $this;
179 | }
180 |
181 | final public function addDate(Date $date): self
182 | {
183 | $this->handleTerm($date);
184 | return $this;
185 | }
186 |
187 | final public function addEmoji(Emoji $emoji): self
188 | {
189 | if ($this->inField || null === $this->emojiFieldName) {
190 | $this->handleTerm($emoji);
191 | return $this;
192 | }
193 |
194 | $field = new Field(
195 | $this->emojiFieldName,
196 | $emoji,
197 | $emoji->getBoolOperator(),
198 | $emoji->useBoost(),
199 | $emoji->getBoost()
200 | );
201 |
202 | return $this->addField($field);
203 | }
204 |
205 | final public function addEmoticon(Emoticon $emoticon): self
206 | {
207 | if ($this->inField || null === $this->emoticonFieldName) {
208 | $this->handleTerm($emoticon);
209 | return $this;
210 | }
211 |
212 | $field = new Field(
213 | $this->emoticonFieldName,
214 | $emoticon,
215 | $emoticon->getBoolOperator(),
216 | $emoticon->useBoost(),
217 | $emoticon->getBoost()
218 | );
219 |
220 | return $this->addField($field);
221 | }
222 |
223 | final public function addField(Field $field): self
224 | {
225 | if ($this->inField || $this->inRange) {
226 | throw new \LogicException('A Field cannot be nested in another Field or Range.');
227 | }
228 |
229 | $this->inField = true;
230 | $this->currentField = $field;
231 | $this->queryOnFieldIsCacheable = $this->queryOnFieldIsCacheable($field);
232 | $this->startField($field, $this->queryOnFieldIsCacheable);
233 | $field->getNode()->acceptBuilder($this);
234 | $this->endField($field, $this->queryOnFieldIsCacheable);
235 | $this->inField = false;
236 | $this->currentField = null;
237 | $this->queryOnFieldIsCacheable = false;
238 | return $this;
239 | }
240 |
241 | final public function addHashtag(Hashtag $hashtag): self
242 | {
243 | if ($this->inField || null === $this->hashtagFieldName) {
244 | $this->handleTerm($hashtag);
245 | return $this;
246 | }
247 |
248 | $field = new Field(
249 | $this->hashtagFieldName,
250 | $hashtag,
251 | $hashtag->getBoolOperator(),
252 | $hashtag->useBoost(),
253 | $hashtag->getBoost()
254 | );
255 |
256 | return $this->addField($field);
257 | }
258 |
259 | final public function addMention(Mention $mention): self
260 | {
261 | if ($this->inField || null === $this->mentionFieldName) {
262 | $this->handleTerm($mention);
263 | return $this;
264 | }
265 |
266 | $field = new Field(
267 | $this->mentionFieldName,
268 | $mention,
269 | $mention->getBoolOperator(),
270 | $mention->useBoost(),
271 | $mention->getBoost()
272 | );
273 |
274 | return $this->addField($field);
275 | }
276 |
277 | final public function addNumber(Numbr $number): self
278 | {
279 | $this->handleTerm($number);
280 | return $this;
281 | }
282 |
283 | final public function addPhrase(Phrase $phrase): self
284 | {
285 | $this->handleText($phrase);
286 | return $this;
287 | }
288 |
289 | final public function addRange(Range $range): self
290 | {
291 | if (!$this->inField || $this->inRange || $this->inSubquery) {
292 | throw new \LogicException('A Range can only be used within a field. e.g. rating:[1..5]');
293 | }
294 |
295 | $this->inRange = true;
296 | $this->handleRange($range, $this->currentField, $this->queryOnFieldIsCacheable);
297 | $this->inRange = false;
298 | return $this;
299 | }
300 |
301 | final public function addSubquery(Subquery $subquery): self
302 | {
303 | if ($this->inRange || $this->inSubquery) {
304 | throw new \LogicException('A Subquery cannot be nested or within a Range.');
305 | }
306 |
307 | $this->inSubquery = true;
308 | $this->startSubquery($subquery, $this->currentField);
309 |
310 | foreach ($subquery->getNodes() as $node) {
311 | $node->acceptBuilder($this);
312 | }
313 |
314 | $this->endSubquery($subquery, $this->currentField);
315 | $this->inSubquery = false;
316 |
317 | return $this;
318 | }
319 |
320 | final public function addUrl(Url $url): self
321 | {
322 | $this->handleTerm($url);
323 | return $this;
324 | }
325 |
326 | final public function addWord(Word $word): self
327 | {
328 | $this->handleText($word);
329 | return $this;
330 | }
331 |
332 | final protected function inField(): bool
333 | {
334 | return $this->inField;
335 | }
336 |
337 | final protected function inRange(): bool
338 | {
339 | return $this->inRange;
340 | }
341 |
342 | final protected function inSubquery(): bool
343 | {
344 | return $this->inSubquery;
345 | }
346 |
347 | private function handleText(Node $node): void
348 | {
349 | if ($this->inField && !$this->supportsFullTextSearch($this->currentField->getName())) {
350 | $this->handleTerm($node);
351 | return;
352 | }
353 |
354 | /*
355 | * When in a simple field, the bool operator is based on
356 | * the field, not the node in the field.
357 | * +field:value vs. field:+value
358 | */
359 | if ($this->inField && !$this->currentField->hasCompoundNode()) {
360 | $isOptional = $this->currentField->isOptional();
361 | $isRequired = $this->currentField->isRequired();
362 | } else {
363 | $isOptional = $node->isOptional();
364 | $isRequired = $node->isRequired();
365 | }
366 |
367 | if ($node instanceof Word && $node->isStopWord()) {
368 | $this->shouldMatch($node, $this->currentField);
369 | return;
370 | } elseif ($isOptional) {
371 | $this->shouldMatch($node, $this->currentField);
372 | return;
373 | } elseif ($isRequired) {
374 | $this->mustMatch($node, $this->currentField);
375 | return;
376 | }
377 |
378 | $this->mustNotMatch($node, $this->currentField);
379 | }
380 |
381 | private function handleTerm(Node $node): void
382 | {
383 | /*
384 | * When in a simple field, the bool operator is based on
385 | * the field, not the node in the field.
386 | * +field:value vs. field:+value
387 | */
388 | if ($this->inField && !$this->currentField->hasCompoundNode()) {
389 | $isOptional = $this->currentField->isOptional();
390 | $isRequired = $this->currentField->isRequired();
391 | } else {
392 | $isOptional = $node->isOptional();
393 | $isRequired = $node->isRequired();
394 | }
395 |
396 | if ($isOptional) {
397 | $this->shouldMatchTerm($node, $this->currentField);
398 | return;
399 | } elseif ($isRequired) {
400 | $this->mustMatchTerm($node, $this->currentField, $this->queryOnFieldIsCacheable);
401 | return;
402 | }
403 |
404 | $this->mustNotMatchTerm($node, $this->currentField, $this->queryOnFieldIsCacheable);
405 | }
406 |
407 | /**
408 | * If the query on this particular field could be cached because it contains
409 | * only exact values, is not optional or boosted then the storage/search
410 | * provider might be able to cache the resultset or optimize the query
411 | * against this field.
412 | *
413 | * This is typically used on required fields that will prefilter the
414 | * results that will be searched on. For example, find all videos
415 | * with "cats" in them that are "status:active". It makes no sense
416 | * to even search for cats in a video when status is not active.
417 | *
418 | * @param Field $field
419 | *
420 | * @return bool
421 | */
422 | protected function queryOnFieldIsCacheable(Field $field): bool
423 | {
424 | if ($field->isOptional() || $field->useBoost()) {
425 | return false;
426 | }
427 |
428 | $node = $field->getNode();
429 | if ($node->useFuzzy()
430 | || $this->supportsFullTextSearch($field->getName())
431 | || $node instanceof Subquery
432 | || $node instanceof WordRange
433 | || $node instanceof Phrase
434 | || ($node instanceof Word && $node->hasTrailingWildcard())
435 | ) {
436 | return false;
437 | }
438 |
439 | return true;
440 | }
441 |
442 | protected function startField(Field $field, bool $cacheable = false): void
443 | {
444 | }
445 |
446 | protected function endField(Field $field, bool $cacheable = false): void
447 | {
448 | }
449 |
450 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void
451 | {
452 | }
453 |
454 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void
455 | {
456 | }
457 |
458 | abstract protected function handleRange(Range $range, Field $field, bool $cacheable = false): void;
459 |
460 | abstract protected function mustMatch(Node $node, ?Field $field = null): void;
461 |
462 | abstract protected function shouldMatch(Node $node, ?Field $field = null): void;
463 |
464 | abstract protected function mustNotMatch(Node $node, ?Field $field = null): void;
465 |
466 | abstract protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void;
467 |
468 | abstract protected function shouldMatchTerm(Node $node, ?Field $field = null): void;
469 |
470 | abstract protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void;
471 | }
472 |
--------------------------------------------------------------------------------
/src/Builder/ElasticaQueryBuilder.php:
--------------------------------------------------------------------------------
1 | defaultFieldName = '_all';
68 | $this->qb = new RuflinQueryBuilder();
69 | $this->clear();
70 | }
71 |
72 | public function clear(): self
73 | {
74 | $this->boolQuery = $this->qb->query()->bool();
75 | $this->outerBoolQuery = $this->boolQuery;
76 | $this->nestedQueries = [];
77 | return $this;
78 | }
79 |
80 | public function ignoreEmojis(bool $ignoreEmojis = true): self
81 | {
82 | $this->ignoreEmojis = $ignoreEmojis;
83 | return $this;
84 | }
85 |
86 | public function ignoreEmoticons(bool $ignoreEmoticons = true): self
87 | {
88 | $this->ignoreEmoticons = $ignoreEmoticons;
89 | return $this;
90 | }
91 |
92 | public function ignoreStopWords(bool $ignoreStopWords = true): self
93 | {
94 | $this->ignoreStopWords = $ignoreStopWords;
95 | return $this;
96 | }
97 |
98 | public function lowerCaseTerms(bool $lowerCaseTerms = true): self
99 | {
100 | $this->lowerCaseTerms = $lowerCaseTerms;
101 | return $this;
102 | }
103 |
104 | public function setNestedFields(array $fields): self
105 | {
106 | $this->nestedFields = array_flip($fields);
107 | return $this;
108 | }
109 |
110 | public function addNestedField(string $fieldName): self
111 | {
112 | $this->nestedFields[$fieldName] = true;
113 | return $this;
114 | }
115 |
116 | public function removeNestedField(string $fieldName): self
117 | {
118 | unset($this->nestedFields[$fieldName]);
119 | return $this;
120 | }
121 |
122 | public function getNestedFields(): array
123 | {
124 | return array_keys($this->nestedFields);
125 | }
126 |
127 | public function getBoolQuery(): BoolQuery
128 | {
129 | if ($this->boolQuery->hasParam('must')) {
130 | // if a "must" is used we assume they wanted everything else optional
131 | return $this->boolQuery;
132 | }
133 |
134 | return $this->boolQuery->setMinimumShouldMatch('2<80%');
135 | }
136 |
137 | protected function handleRange(Range $range, Field $field, bool $cacheable = false): void
138 | {
139 | $useBoost = $field->useBoost();
140 | $boost = $field->getBoost();
141 | $boolOp = $field->getBoolOperator();
142 |
143 | if ($boolOp === BoolOperator::REQUIRED) {
144 | $method = 'addMust';
145 | } elseif ($boolOp === BoolOperator::PROHIBITED) {
146 | $method = 'addMustNot';
147 | } else {
148 | $method = 'addShould';
149 | }
150 |
151 | if ($range->isExclusive()) {
152 | $lowerOperator = 'gt';
153 | $upperOperator = 'lt';
154 | } else {
155 | $lowerOperator = 'gte';
156 | $upperOperator = 'lte';
157 | }
158 |
159 | $data = [];
160 |
161 | if ($range instanceof DateRange) {
162 | if ($range->hasLowerNode()) {
163 | $data[$lowerOperator] = $range->getLowerNode()
164 | ->toDateTime($this->localTimeZone)
165 | ->format('Y-m-d');
166 | }
167 | if ($range->hasUpperNode()) {
168 | $data[$upperOperator] = $range->getUpperNode()
169 | ->toDateTime($this->localTimeZone)
170 | ->modify('+1 day')
171 | ->format('Y-m-d');
172 | }
173 | } else {
174 | if ($range->hasLowerNode()) {
175 | $data[$lowerOperator] = $range->getLowerNode()->getValue();
176 | }
177 | if ($range->hasUpperNode()) {
178 | $data[$upperOperator] = $range->getUpperNode()->getValue();
179 | }
180 | }
181 |
182 | if ($cacheable) {
183 | if ('addMustNot' === $method) {
184 | $this->addToBoolQuery($method, $field->getName(), $this->qb->query()->range($field->getName(), $data));
185 | } else {
186 | $this->addToBoolQuery('addFilter', $field->getName(), $this->qb->query()->range($field->getName(), $data));
187 | }
188 |
189 | return;
190 | }
191 |
192 | if ($useBoost) {
193 | $data['boost'] = $boost;
194 | }
195 |
196 | $this->addToBoolQuery($method, $field->getName(), $this->qb->query()->range($field->getName(), $data));
197 | }
198 |
199 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void
200 | {
201 | $this->outerBoolQuery = $this->boolQuery;
202 | $this->boolQuery = $this->qb->query()->bool();
203 | }
204 |
205 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void
206 | {
207 | $params = $this->boolQuery->getParams();
208 | if (!empty($params)) {
209 | $this->boolQuery->setMinimumShouldMatch(1);
210 |
211 | if ($this->inField()) {
212 | $useBoost = $field->useBoost();
213 | $boost = $field->getBoost();
214 | $boolOp = $field->getBoolOperator();
215 | } else {
216 | $useBoost = $subquery->useBoost();
217 | $boost = $subquery->getBoost();
218 | $boolOp = $subquery->getBoolOperator();
219 | }
220 |
221 | if ($useBoost) {
222 | $this->boolQuery->setBoost($boost);
223 | }
224 |
225 | if ($boolOp === BoolOperator::REQUIRED) {
226 | $this->outerBoolQuery->addMust($this->boolQuery);
227 | } elseif ($boolOp === BoolOperator::PROHIBITED) {
228 | $this->outerBoolQuery->addMustNot($this->boolQuery);
229 | } else {
230 | $this->outerBoolQuery->addShould($this->boolQuery);
231 | }
232 | }
233 |
234 | $this->boolQuery = $this->outerBoolQuery;
235 | }
236 |
237 | protected function mustMatch(Node $node, ?Field $field = null): void
238 | {
239 | $this->addTextToQuery('addMust', $node, $field);
240 | }
241 |
242 | protected function shouldMatch(Node $node, ?Field $field = null): void
243 | {
244 | $this->addTextToQuery('addShould', $node, $field);
245 | }
246 |
247 | protected function mustNotMatch(Node $node, ?Field $field = null): void
248 | {
249 | $this->addTextToQuery('addMustNot', $node, $field);
250 | }
251 |
252 | /**
253 | * Adds a text node to the active query. These all use the "match" when full
254 | * text searching is needed/supported.
255 | *
256 | * @param string $method
257 | * @param Node $node
258 | * @param Field $field
259 | */
260 | protected function addTextToQuery(string $method, Node $node, ?Field $field = null): void
261 | {
262 | if ($node instanceof Word && $node->isStopWord() && $this->ignoreStopWords) {
263 | return;
264 | }
265 |
266 | $fieldName = $this->inField() ? $field->getName() : $this->defaultFieldName;
267 |
268 | if ($this->inField() && !$this->inSubquery()) {
269 | $useBoost = $field->useBoost();
270 | $boost = $field->getBoost();
271 | $useFuzzy = $field->useFuzzy();
272 | $fuzzy = $field->getFuzzy();
273 | } else {
274 | $useBoost = $node->useBoost();
275 | $boost = $node->getBoost();
276 | $useFuzzy = $node->useFuzzy();
277 | $fuzzy = $node->getFuzzy();
278 | }
279 |
280 | /*
281 | * Look for special chars and if found, enforce fuzzy.
282 | * todo: review this with more test cases
283 | */
284 | if (!$useFuzzy
285 | && $node instanceof Phrase
286 | && 'addShould' === $method
287 | && preg_match('/[^a-zA-Z0-9\s\._-]+/', $node->getValue())
288 | ) {
289 | $useFuzzy = true;
290 | $fuzzy = 1;
291 | }
292 |
293 | if ($node instanceof Phrase) {
294 | $data = ['query' => $node->getValue()];
295 |
296 | if ($useBoost) {
297 | $data['boost'] = $boost;
298 | }
299 |
300 | if ($useFuzzy) {
301 | $data['slop'] = $fuzzy;
302 | }
303 |
304 | $query = $this->qb->query()->match_phrase($fieldName, $data);
305 | } elseif ($useFuzzy) {
306 | $query = $this->qb->query()->fuzzy($fieldName, $node->getValue());
307 | $query->setFieldOption('fuzziness', $fuzzy);
308 |
309 | if ($useBoost) {
310 | $query->setFieldOption('boost', $boost);
311 | }
312 | } elseif ($node instanceof Word && $node->hasTrailingWildcard()) {
313 | $query = $this->qb->query()->wildcard(
314 | $fieldName,
315 | strtolower($node->getValue()) . '*',
316 | $useBoost ? $boost : Word::DEFAULT_BOOST
317 | );
318 | } else {
319 | $data = ['query' => $node->getValue(), 'operator' => 'and', 'lenient' => true];
320 |
321 | if ($useBoost) {
322 | $data['boost'] = $boost;
323 | }
324 |
325 | $query = $this->qb->query()->match($fieldName, $data);
326 | }
327 |
328 | $this->addToBoolQuery($method, $fieldName, $query);
329 | }
330 |
331 | protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void
332 | {
333 | $this->addTermToQuery('addMust', $node, $field, $cacheable);
334 | }
335 |
336 | protected function shouldMatchTerm(Node $node, ?Field $field = null): void
337 | {
338 | $this->addTermToQuery('addShould', $node, $field);
339 | }
340 |
341 | protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void
342 | {
343 | $this->addTermToQuery('addMustNot', $node, $field, $cacheable);
344 | }
345 |
346 | /**
347 | * Adds a term to the bool query or filter context. Filter context is used when the
348 | * request for that item could be cached, like documents with hashtag of cats.
349 | *
350 | * @param string $method
351 | * @param Node $node
352 | * @param Field $field
353 | * @param bool $cacheable
354 | */
355 | protected function addTermToQuery(string $method, Node $node, ?Field $field = null, bool $cacheable = false): void
356 | {
357 | if ($node instanceof Emoji && $this->ignoreEmojis) {
358 | return;
359 | }
360 |
361 | if ($node instanceof Emoticon && $this->ignoreEmoticons) {
362 | return;
363 | }
364 |
365 | $value = $this->lowerCaseTerms && !$node instanceof Numbr ? strtolower((string)$node->getValue()) : $node->getValue();
366 | $fieldName = $this->inField() ? $field->getName() : $this->defaultFieldName;
367 |
368 | if ($this->inField() && !$this->inSubquery()) {
369 | $useBoost = $field->useBoost();
370 | $boost = $field->getBoost();
371 | } else {
372 | $useBoost = $node->useBoost();
373 | $boost = $node->getBoost();
374 | }
375 |
376 | if ('_exists_' === $fieldName) {
377 | $term = new Exists($value);
378 | $method = 'addMust';
379 | $cacheable = true;
380 | } elseif ('_missing_' === $fieldName) {
381 | $term = new Exists($value);
382 | $method = 'addMustNot';
383 | $cacheable = true;
384 | } elseif ($node instanceof Date) {
385 | $term = $this->createDateRangeForSingleNode(
386 | $fieldName,
387 | $node,
388 | $cacheable,
389 | $useBoost ? $boost : Date::DEFAULT_BOOST
390 | );
391 | } elseif ($node instanceof Numbr && $node->useComparisonOperator()) {
392 | $data = [$node->getComparisonOperator()->value => $value];
393 | if ($useBoost) {
394 | $data['boost'] = $boost;
395 | }
396 | $term = $this->qb->query()->range($fieldName, $data);
397 | } else {
398 | $term = $this->qb->query()->term();
399 | $term->setTerm($fieldName, $value, $boost);
400 | }
401 |
402 | if ($cacheable) {
403 | if ('addMustNot' === $method) {
404 | $this->addToBoolQuery($method, $fieldName, $term);
405 | } else {
406 | $this->addToBoolQuery('addFilter', $fieldName, $term);
407 | }
408 | } else {
409 | $this->addToBoolQuery($method, $fieldName, $term);
410 | }
411 | }
412 |
413 | /**
414 | * When dealing with dates we have to create a range, even when the user provides
415 | * an exact date. This is because a user asking for documents on date 2015-12-01
416 | * but the value is stored as a timestamp (for example).
417 | * So we ask for documents >=2015-12-01 and <=2015-12-02
418 | *
419 | * The Date node is a date with no time component. @see Date::toDateTime
420 | *
421 | * @param string $fieldName
422 | * @param Date $node
423 | * @param bool $cacheable
424 | * @param float $boost
425 | *
426 | * @return RangeQuery
427 | */
428 | protected function createDateRangeForSingleNode(
429 | string $fieldName,
430 | Date $node,
431 | bool $cacheable = false,
432 | float $boost = Date::DEFAULT_BOOST
433 | ): RangeQuery {
434 | $operator = $node->getComparisonOperator();
435 |
436 | if ($operator === ComparisonOperator::EQ) {
437 | $date = $node->toDateTime($this->localTimeZone);
438 | $data = [
439 | 'gte' => $date->format('Y-m-d'),
440 | 'lt' => $date->modify('+1 day')->format('Y-m-d'),
441 | ];
442 | } else {
443 | $data = [$operator->value => $node->toDateTime($this->localTimeZone)->format('Y-m-d')];
444 | }
445 |
446 | if ($cacheable) {
447 | return $this->qb->query()->range($fieldName, $data);
448 | }
449 |
450 | $data['boost'] = $boost;
451 | return $this->qb->query()->range($fieldName, $data);
452 | }
453 |
454 | protected function addToBoolQuery(string $method, string $fieldName, AbstractQuery $query): void
455 | {
456 | if (!str_contains($fieldName, '.')) {
457 | $this->boolQuery->$method($query);
458 | return;
459 | }
460 |
461 | $fieldName = str_replace('.raw', '', $fieldName);
462 | $nestedPath = substr($fieldName, 0, strrpos($fieldName, '.'));
463 | if (!isset($this->nestedFields[$nestedPath])) {
464 | $this->boolQuery->$method($query);
465 | return;
466 | }
467 |
468 | $nestedQuery = $nestedPath . '-' . $method;
469 | if (!isset($this->nestedQueries[$nestedQuery])) {
470 | $this->nestedQueries[$nestedQuery] = (new Nested())
471 | ->setQuery($this->qb->query()->bool()->setMinimumShouldMatch('2<80%'))
472 | ->setPath($nestedPath)
473 | ->setParam('ignore_unmapped', true);
474 | $this->boolQuery->$method($this->nestedQueries[$nestedQuery]);
475 | }
476 |
477 | $this->nestedQueries[$nestedQuery]->getParam('query')->$method($query);
478 | }
479 | }
480 |
--------------------------------------------------------------------------------
/src/Builder/QueryBuilder.php:
--------------------------------------------------------------------------------
1 | result = '';
32 | $this->indent = 2;
33 | return $this;
34 | }
35 |
36 | public function toXmlString(): string
37 | {
38 | return '' . PHP_EOL . '' . PHP_EOL . rtrim((string)$this->result) . PHP_EOL . '';
39 | }
40 |
41 | public function toSimpleXmlElement(): \SimpleXMLElement
42 | {
43 | try {
44 | $xml = new \SimpleXMLElement($this->toXmlString());
45 | } catch (\Throwable $e) {
46 | $xml = null;
47 | }
48 |
49 | if ($xml instanceof \SimpleXMLElement) {
50 | return $xml;
51 | }
52 |
53 | return new \SimpleXMLElement('');
54 | }
55 |
56 | protected function startField(Field $field, bool $cacheable = false): void
57 | {
58 | $tag = sprintf('field name="%s"', $field->getName());
59 |
60 | if (!$field->isOptional()) {
61 | $tag .= sprintf(' bool_operator="%s"', strtolower($field->getBoolOperator()->name));
62 | }
63 |
64 | if ($cacheable) {
65 | $tag .= ' cacheable="true"';
66 | }
67 |
68 | if ($field->useBoost()) {
69 | $tag .= sprintf(' boost="%s"', $field->getBoost());
70 | }
71 |
72 | $this->printLine(sprintf('<%s>', $tag));
73 | $this->indent();
74 | }
75 |
76 | protected function endField(Field $field, bool $cacheable = false): void
77 | {
78 | $this->outdent();
79 | $this->printLine('');
80 | }
81 |
82 | protected function handleRange(Range $range, Field $field, bool $cacheable = false): void
83 | {
84 | $this->printLine(
85 | $range->isExclusive() ? '<' . $range::NODE_TYPE . ' exclusive="true">' : '<' . $range::NODE_TYPE . '>'
86 | );
87 | $this->indent();
88 | $this->printLine('');
89 | $this->indent();
90 |
91 | if ($range->hasLowerNode()) {
92 | $range->getLowerNode()->acceptBuilder($this);
93 | } else {
94 | $this->printLine('');
95 | }
96 |
97 | $this->outdent();
98 | $this->printLine('');
99 | $this->printLine('');
100 | $this->indent();
101 |
102 | if ($range->hasUpperNode()) {
103 | $range->getUpperNode()->acceptBuilder($this);
104 | } else {
105 | $this->printLine('');
106 | }
107 |
108 | $this->outdent();
109 | $this->printLine('');
110 | $this->outdent();
111 |
112 | $this->printLine('' . $range::NODE_TYPE . '>');
113 | }
114 |
115 | protected function startSubquery(Subquery $subquery, ?Field $field = null): void
116 | {
117 | $tag = $subquery::NODE_TYPE;
118 | $inField = $field instanceof Field;
119 |
120 | if (!$inField && $subquery->useBoost()) {
121 | $tag .= sprintf(' boost="%s"', $subquery->getBoost());
122 | }
123 |
124 | $this->printLine(sprintf('<%s>', $tag));
125 | $this->indent();
126 | }
127 |
128 | protected function endSubquery(Subquery $subquery, ?Field $field = null): void
129 | {
130 | $this->outdent();
131 | $this->printLine('');
132 | }
133 |
134 | protected function mustMatch(Node $node, ?Field $field = null): void
135 | {
136 | $this->printSimpleNode(__FUNCTION__, $node, $field);
137 | }
138 |
139 | protected function shouldMatch(Node $node, ?Field $field = null): void
140 | {
141 | $this->printSimpleNode(__FUNCTION__, $node, $field);
142 | }
143 |
144 | protected function mustNotMatch(Node $node, ?Field $field = null): void
145 | {
146 | $this->printSimpleNode(__FUNCTION__, $node, $field);
147 | }
148 |
149 | protected function mustMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void
150 | {
151 | $this->printSimpleNode(__FUNCTION__, $node, $field);
152 | }
153 |
154 | protected function shouldMatchTerm(Node $node, ?Field $field = null): void
155 | {
156 | $this->printSimpleNode(__FUNCTION__, $node, $field);
157 | }
158 |
159 | protected function mustNotMatchTerm(Node $node, ?Field $field = null, bool $cacheable = false): void
160 | {
161 | $this->printSimpleNode(__FUNCTION__, $node, $field);
162 | }
163 |
164 | protected function printSimpleNode(string $rule, Node $node, ?Field $field = null): void
165 | {
166 | if ($this->inRange()) {
167 | $this->printLine(sprintf('<%s>%s%s>', $node::NODE_TYPE, $node->getValue(), $node::NODE_TYPE));
168 | return;
169 | }
170 |
171 | if ($field instanceof Field) {
172 | $tag = $node::NODE_TYPE;
173 | if ($node instanceof Word && $node->hasTrailingWildcard()) {
174 | $tag .= ' trailing_wildcard="true"';
175 | }
176 | } else {
177 | $tag = $node::NODE_TYPE;
178 | if ($node->useBoost()) {
179 | $tag .= sprintf(' boost="%s"', $node->getBoost());
180 | } elseif ($node->useFuzzy()) {
181 | $tag .= sprintf(' fuzzy="%s"', $node->getFuzzy());
182 | } elseif ($node instanceof Word && $node->hasTrailingWildcard()) {
183 | $tag .= ' trailing_wildcard="true"';
184 | }
185 | }
186 |
187 | $snaked = trim(strtolower(preg_replace('/([A-Z])/', '_$1', $rule)), '_');
188 | $tag .= sprintf(' rule="%s"', $snaked);
189 |
190 | if ($node instanceof Numbr || $node instanceof Date) {
191 | $comparisonOperator = match ($node->getComparisonOperator()) {
192 | ComparisonOperator::GT => 'gt',
193 | ComparisonOperator::GTE => 'gte',
194 | ComparisonOperator::LT => 'lt',
195 | ComparisonOperator::LTE => 'lte',
196 | default => null,
197 | };
198 |
199 | if (null !== $comparisonOperator) {
200 | $tag .= sprintf(' comparison_operator="%s"', $comparisonOperator);
201 | }
202 | }
203 |
204 | $value = (string)$node->getValue();
205 | if (preg_match('/[^a-zA-Z0-9\s!@#$%\^\*\(\)_\-+"\'\\{\}:;\?\.]+/', $value)) {
206 | $value = '';
207 | }
208 |
209 | $this->printLine(sprintf('<%s>%s%s>', $tag, $value, $node::NODE_TYPE));
210 | }
211 |
212 | protected function printLine(string $line, bool $newLine = true): void
213 | {
214 | $this->result .= str_repeat(' ', $this->indent) . $line . ($newLine ? PHP_EOL : '');
215 | }
216 |
217 | protected function indent(int $step = 2): void
218 | {
219 | $this->indent += $step;
220 | }
221 |
222 | protected function outdent(int $step = 2): void
223 | {
224 | $this->indent -= $step;
225 | }
226 | }
227 |
--------------------------------------------------------------------------------
/src/Enum/BoolOperator.php:
--------------------------------------------------------------------------------
1 | comparisonOperator = $comparisonOperator ?: ComparisonOperator::EQ;
34 | }
35 |
36 | public static function fromArray(array $data = []): self
37 | {
38 | $value = $data['value'] ?? '';
39 | $useBoost = (bool)($data['use_boost'] ?? false);
40 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST);
41 | $useFuzzy = (bool)($data['use_fuzzy'] ?? false);
42 | $fuzzy = (int)($data['fuzzy'] ?? self::DEFAULT_FUZZY);
43 |
44 | try {
45 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null;
46 | } catch (\Throwable $e) {
47 | $boolOperator = null;
48 | }
49 |
50 | try {
51 | $comparisonOperator = isset($data['comparison_operator']) ? ComparisonOperator::from($data['comparison_operator']) : null;
52 | } catch (\Throwable $e) {
53 | $comparisonOperator = null;
54 | }
55 |
56 | return new self($value, $boolOperator, $useBoost, $boost, $useFuzzy, $fuzzy, $comparisonOperator);
57 | }
58 |
59 | public function toArray(): array
60 | {
61 | $array = parent::toArray();
62 | if ($this->comparisonOperator === ComparisonOperator::EQ) {
63 | return $array;
64 | }
65 |
66 | $array['comparison_operator'] = $this->comparisonOperator->value;
67 | return $array;
68 | }
69 |
70 | public function useComparisonOperator(): bool
71 | {
72 | return $this->comparisonOperator !== ComparisonOperator::EQ;
73 | }
74 |
75 | public function getComparisonOperator(): ComparisonOperator
76 | {
77 | return $this->comparisonOperator;
78 | }
79 |
80 | /**
81 | * Always returns a DateTime in UTC. Use the time zone option to inform this class
82 | * that the value it holds is localized and should be converted to UTC.
83 | *
84 | * @param \DateTimeZone $timeZone
85 | *
86 | * @return \DateTimeInterface
87 | */
88 | public function toDateTime(?\DateTimeZone $timeZone = null): \DateTimeInterface
89 | {
90 | if (null === self::$utc) {
91 | self::$utc = new \DateTimeZone('UTC');
92 | }
93 |
94 | $date = \DateTime::createFromFormat('!Y-m-d', $this->getValue(), $timeZone ?: self::$utc);
95 | if (!$date instanceof \DateTimeInterface) {
96 | $date = \DateTime::createFromFormat('!Y-m-d', (new \DateTime())->format('Y-m-d'), $timeZone ?: self::$utc);
97 | }
98 |
99 | if ($date->getOffset() !== 0) {
100 | $date->setTimezone(self::$utc);
101 | }
102 |
103 | return $date;
104 | }
105 |
106 | public function acceptBuilder(QueryBuilder $builder): void
107 | {
108 | $builder->addDate($this);
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/Node/DateRange.php:
--------------------------------------------------------------------------------
1 | addEmoji($this);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Node/Emoticon.php:
--------------------------------------------------------------------------------
1 | addEmoticon($this);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Node/Field.php:
--------------------------------------------------------------------------------
1 | 'real_field_name'].
16 | * For example: plays:>100 should actually be: plays_count:>100.
17 | *
18 | * @var array
19 | */
20 | public static array $aliases = [];
21 | private Node $node;
22 |
23 | public function __construct(
24 | string $fieldName,
25 | Node $node,
26 | ?BoolOperator $boolOperator = null,
27 | bool $useBoost = false,
28 | float $boost = self::DEFAULT_BOOST
29 | ) {
30 | if (isset(self::$aliases[$fieldName])) {
31 | $fieldName = self::$aliases[$fieldName];
32 | }
33 |
34 | parent::__construct($fieldName, $boolOperator, $useBoost, $boost);
35 | $this->node = $node;
36 |
37 | if ($this->node instanceof Field) {
38 | throw new \LogicException('A Field cannot contain another field.');
39 | }
40 | }
41 |
42 | public static function fromArray(array $data = []): self
43 | {
44 | $value = $data['value'] ?? '';
45 | $useBoost = (bool)($data['use_boost'] ?? false);
46 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST);
47 |
48 | try {
49 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null;
50 | } catch (\Throwable $e) {
51 | $boolOperator = null;
52 | }
53 |
54 | /** @var Node $node */
55 | $node = isset($data['node']) ? self::factory($data['node']) : null;
56 |
57 | return new self($value, $node, $boolOperator, $useBoost, $boost);
58 | }
59 |
60 | public function toArray(): array
61 | {
62 | $array = parent::toArray();
63 | $array['node'] = $this->node->toArray();
64 | return $array;
65 | }
66 |
67 | public function getName(): string
68 | {
69 | return $this->getValue();
70 | }
71 |
72 | public function getNode(): Node
73 | {
74 | return $this->node;
75 | }
76 |
77 | public function hasCompoundNode(): bool
78 | {
79 | return $this->node->isCompoundNode();
80 | }
81 |
82 | public function acceptBuilder(QueryBuilder $builder): void
83 | {
84 | $builder->addField($this);
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/Node/Hashtag.php:
--------------------------------------------------------------------------------
1 | addHashtag($this);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Node/Mention.php:
--------------------------------------------------------------------------------
1 | addMention($this);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Node/Node.php:
--------------------------------------------------------------------------------
1 | value = $value;
44 | $this->boolOperator = $boolOperator ?: BoolOperator::OPTIONAL;
45 |
46 | $this->useBoost = $useBoost && static::SUPPORTS_BOOST && $this->boolOperator === BoolOperator::OPTIONAL;
47 | if ($this->useBoost) {
48 | $this->boost = $boost;
49 | if ($this->boost < static::MIN_BOOST) {
50 | $this->boost = static::MIN_BOOST;
51 | }
52 |
53 | if ($this->boost > static::MAX_BOOST) {
54 | $this->boost = static::MAX_BOOST;
55 | }
56 | }
57 |
58 | $this->useFuzzy = $useFuzzy && static::SUPPORTS_FUZZY && $this->boolOperator === BoolOperator::OPTIONAL;
59 | if ($this->useFuzzy) {
60 | $this->fuzzy = min(max($fuzzy, static::MIN_FUZZY), static::MAX_FUZZY);
61 | }
62 | }
63 |
64 | public static function factory(array $data = []): self
65 | {
66 | $type = $data['type'];
67 | // fix for php7 reserved name (scalar type hint)
68 | if ('number' === $type) {
69 | $type = 'numbr';
70 | }
71 |
72 | /** @var Node $class */
73 | $camel = str_replace(' ', '', ucwords(str_replace('_', ' ', $type)));
74 | $class = 'Gdbots\QueryParser\Node\\' . $camel;
75 | if (!class_exists($class)) {
76 | throw new \InvalidArgumentException(sprintf('Node type [%s] does not exist.', $type));
77 | }
78 |
79 | return $class::fromArray($data);
80 | }
81 |
82 | public function toArray(): array
83 | {
84 | $array = ['type' => static::NODE_TYPE];
85 |
86 | if ($this->hasValue()) {
87 | $array['value'] = $this->value;
88 | }
89 |
90 | if (!$this->isOptional()) {
91 | $array['bool_operator'] = $this->boolOperator->value;
92 | }
93 |
94 | if ($this->useBoost) {
95 | $array['use_boost'] = $this->useBoost;
96 | $array['boost'] = $this->boost;
97 | }
98 |
99 | if ($this->useFuzzy) {
100 | $array['use_fuzzy'] = $this->useFuzzy;
101 | $array['fuzzy'] = $this->fuzzy;
102 | }
103 |
104 | return $array;
105 | }
106 |
107 | final public function jsonSerialize(): array
108 | {
109 | return $this->toArray();
110 | }
111 |
112 | final public function hasValue(): bool
113 | {
114 | return null !== $this->value && '' !== $this->value;
115 | }
116 |
117 | final public function getValue()
118 | {
119 | return $this->value;
120 | }
121 |
122 | final public function getBoolOperator(): BoolOperator
123 | {
124 | return $this->boolOperator;
125 | }
126 |
127 | final public function isOptional(): bool
128 | {
129 | return $this->boolOperator === BoolOperator::OPTIONAL;
130 | }
131 |
132 | final public function isRequired(): bool
133 | {
134 | return $this->boolOperator === BoolOperator::REQUIRED;
135 | }
136 |
137 | final public function isProhibited(): bool
138 | {
139 | return $this->boolOperator === BoolOperator::PROHIBITED;
140 | }
141 |
142 | final public function isCompoundNode(): bool
143 | {
144 | return static::COMPOUND_NODE;
145 | }
146 |
147 | public function useComparisonOperator(): bool
148 | {
149 | return false;
150 | }
151 |
152 | final public function useBoost(): bool
153 | {
154 | return $this->useBoost;
155 | }
156 |
157 | final public function getBoost(): float
158 | {
159 | return $this->boost;
160 | }
161 |
162 | final public function useFuzzy(): bool
163 | {
164 | return $this->useFuzzy;
165 | }
166 |
167 | final public function getFuzzy(): int
168 | {
169 | return $this->fuzzy;
170 | }
171 |
172 | public function acceptBuilder(QueryBuilder $builder): void
173 | {
174 | // do nothing
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/src/Node/NumberRange.php:
--------------------------------------------------------------------------------
1 | comparisonOperator = $comparisonOperator ?: ComparisonOperator::EQ;
21 | }
22 |
23 | public static function fromArray(array $data = []): self
24 | {
25 | $value = (float)($data['value'] ?? 0.0);
26 |
27 | try {
28 | $comparisonOperator = isset($data['comparison_operator']) ? ComparisonOperator::from($data['comparison_operator']) : null;
29 | } catch (\Throwable $e) {
30 | $comparisonOperator = null;
31 | }
32 |
33 | return new self($value, $comparisonOperator);
34 | }
35 |
36 | public function toArray(): array
37 | {
38 | $array = parent::toArray();
39 | if ($this->comparisonOperator === ComparisonOperator::EQ) {
40 | return $array;
41 | }
42 |
43 | $array['comparison_operator'] = $this->comparisonOperator->value;
44 | return $array;
45 | }
46 |
47 | public function useComparisonOperator(): bool
48 | {
49 | return $this->comparisonOperator !== ComparisonOperator::EQ;
50 | }
51 |
52 | public function getComparisonOperator(): ComparisonOperator
53 | {
54 | return $this->comparisonOperator;
55 | }
56 |
57 | public function acceptBuilder(QueryBuilder $builder): void
58 | {
59 | $builder->addNumber($this);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/Node/Phrase.php:
--------------------------------------------------------------------------------
1 | addPhrase($this);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/Node/Range.php:
--------------------------------------------------------------------------------
1 | lowerNode = $lowerNode;
21 | $this->upperNode = $upperNode;
22 | $this->exclusive = $exclusive;
23 |
24 | if (null === $this->lowerNode && null === $this->upperNode) {
25 | throw new \LogicException('Range requires at least a lower or upper node.');
26 | }
27 | }
28 |
29 | final public static function fromArray(array $data = []): self
30 | {
31 | $lowerNode = isset($data['lower_node']) ? self::factory($data['lower_node']) : null;
32 | $upperNode = isset($data['upper_node']) ? self::factory($data['upper_node']) : null;
33 | $exclusive = isset($data['exclusive']) ? (bool)$data['exclusive'] : false;
34 | return new static($lowerNode, $upperNode, $exclusive);
35 | }
36 |
37 | final public function toArray(): array
38 | {
39 | $array = parent::toArray();
40 |
41 | if (null !== $this->lowerNode) {
42 | $array['lower_node'] = $this->lowerNode;
43 | }
44 |
45 | if (null !== $this->upperNode) {
46 | $array['upper_node'] = $this->upperNode;
47 | }
48 |
49 | if ($this->exclusive) {
50 | $array['exclusive'] = $this->exclusive;
51 | }
52 |
53 | return $array;
54 | }
55 |
56 | final public function hasLowerNode(): bool
57 | {
58 | return null !== $this->lowerNode;
59 | }
60 |
61 | public function getLowerNode(): ?Node
62 | {
63 | return $this->lowerNode;
64 | }
65 |
66 | final public function hasUpperNode(): bool
67 | {
68 | return null !== $this->upperNode;
69 | }
70 |
71 | public function getUpperNode(): ?Node
72 | {
73 | return $this->upperNode;
74 | }
75 |
76 | final public function isInclusive(): bool
77 | {
78 | return !$this->exclusive;
79 | }
80 |
81 | final public function isExclusive(): bool
82 | {
83 | return $this->exclusive;
84 | }
85 |
86 | final public function acceptBuilder(QueryBuilder $builder): void
87 | {
88 | $builder->addRange($this);
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/Node/Subquery.php:
--------------------------------------------------------------------------------
1 | nodes = $nodes;
25 |
26 | foreach ($this->nodes as $node) {
27 | if ($node->isCompoundNode()) {
28 | throw new \LogicException('A Subquery cannot contain compound nodes. (Field, Range, Subquery)');
29 | }
30 | }
31 | }
32 |
33 | public static function fromArray(array $data = []): self
34 | {
35 | $useBoost = (bool)($data['use_boost'] ?? false);
36 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST);
37 |
38 | $nodes = [];
39 | if (isset($data['nodes'])) {
40 | foreach ($data['nodes'] as $node) {
41 | $nodes[] = self::factory($node);
42 | }
43 | }
44 |
45 | try {
46 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null;
47 | } catch (\Throwable $e) {
48 | $boolOperator = null;
49 | }
50 |
51 | return new self($nodes, $boolOperator, $useBoost, $boost);
52 | }
53 |
54 | public function toArray(): array
55 | {
56 | $array = parent::toArray();
57 | $array['nodes'] = [];
58 |
59 | foreach ($this->nodes as $node) {
60 | $array['nodes'][] = $node->toArray();
61 | }
62 |
63 | return $array;
64 | }
65 |
66 | /**
67 | * @return Node[]
68 | */
69 | public function getNodes(): array
70 | {
71 | return $this->nodes;
72 | }
73 |
74 | public function acceptBuilder(QueryBuilder $builder): void
75 | {
76 | $builder->addSubquery($this);
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/Node/Url.php:
--------------------------------------------------------------------------------
1 | addUrl($this);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/Node/Word.php:
--------------------------------------------------------------------------------
1 | trailingWildcard = $trailingWildcard;
33 | }
34 |
35 | public static function fromArray(array $data = []): self
36 | {
37 | $value = $data['value'] ?? '';
38 | $useBoost = (bool)($data['use_boost'] ?? false);
39 | $boost = (float)($data['boost'] ?? self::DEFAULT_BOOST);
40 | $useFuzzy = (bool)($data['use_fuzzy'] ?? false);
41 | $fuzzy = (int)($data['fuzzy'] ?? self::DEFAULT_FUZZY);
42 | $trailingWildcard = (bool)($data['trailing_wildcard'] ?? false);
43 |
44 | try {
45 | $boolOperator = isset($data['bool_operator']) ? BoolOperator::from($data['bool_operator']) : null;
46 | } catch (\Throwable $e) {
47 | $boolOperator = null;
48 | }
49 |
50 | return new self($value, $boolOperator, $useBoost, $boost, $useFuzzy, $fuzzy, $trailingWildcard);
51 | }
52 |
53 | public function toArray(): array
54 | {
55 | $array = parent::toArray();
56 | if (!$this->trailingWildcard) {
57 | return $array;
58 | }
59 |
60 | $array['trailing_wildcard'] = $this->trailingWildcard;
61 | return $array;
62 | }
63 |
64 | public function hasTrailingWildcard(): bool
65 | {
66 | return $this->trailingWildcard;
67 | }
68 |
69 | public function isStopWord(): bool
70 | {
71 | return in_array(strtolower($this->getValue()), self::$stopWords);
72 | }
73 |
74 | public function acceptBuilder(QueryBuilder $builder): void
75 | {
76 | $builder->addWord($this);
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/Node/WordRange.php:
--------------------------------------------------------------------------------
1 | addNode(Node::factory($v));
20 | }
21 |
22 | return $obj;
23 | }
24 |
25 | public function toArray(): array
26 | {
27 | return $this->nodes;
28 | }
29 |
30 | public function jsonSerialize(): array
31 | {
32 | return $this->toArray();
33 | }
34 |
35 | /**
36 | * @param Node[] $nodes
37 | *
38 | * @return self
39 | */
40 | public function addNodes(array $nodes): self
41 | {
42 | foreach ($nodes as $node) {
43 | $this->addNode($node);
44 | }
45 |
46 | return $this;
47 | }
48 |
49 | /**
50 | * @param Node $node
51 | *
52 | * @return self
53 | */
54 | public function addNode(Node $node): self
55 | {
56 | $this->nodes[] = $node;
57 | $this->nodesByType[$node::NODE_TYPE][] = $node;
58 | return $this;
59 | }
60 |
61 | /**
62 | * @return Node[]
63 | */
64 | public function getNodes(): array
65 | {
66 | return $this->nodes;
67 | }
68 |
69 | /**
70 | * @param string $type
71 | *
72 | * @return Node[]
73 | */
74 | public function getNodesOfType(string $type): array
75 | {
76 | return isset($this->nodesByType[$type]) ? $this->nodesByType[$type] : [];
77 | }
78 |
79 | /**
80 | * Returns true if the parsed query contains at least one request for an item
81 | * matching the query. If all of the nodes are "prohibited" values it
82 | * can easily review your entire index.
83 | *
84 | * @return bool
85 | */
86 | public function hasAMatchableNode(): bool
87 | {
88 | foreach ($this->nodes as $node) {
89 | if (!$node->isProhibited()) {
90 | return true;
91 | }
92 | }
93 |
94 | return false;
95 | }
96 |
97 | /**
98 | * Returns an array of fields (specifically the field names) that are
99 | * used in this query. e.g. "status:active", "status" is the field name.
100 | *
101 | * @return string[]
102 | */
103 | public function getFieldsUsed(): array
104 | {
105 | $fields = [];
106 |
107 | /** @var Field $node */
108 | foreach ($this->getNodesOfType(Field::NODE_TYPE) as $node) {
109 | $fields[$node->getName()] = true;
110 | }
111 |
112 | return array_keys($fields);
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/QueryParser.php:
--------------------------------------------------------------------------------
1 | tokenizer = new Tokenizer();
39 | }
40 |
41 | public function parse(string $input): ParsedQuery
42 | {
43 | $this->stream = $this->tokenizer->scan($input);
44 | $query = new ParsedQuery();
45 |
46 | while ($this->stream->next()) {
47 | $boolOperator = $this->getBoolOperator();
48 | $token = $this->stream->getCurrent();
49 | if ($token->typeEquals(Token::T_EOI)) {
50 | break;
51 | }
52 |
53 | $query->addNodes($this->createNodes($token, $boolOperator));
54 | }
55 |
56 | return $query;
57 | }
58 |
59 | /**
60 | * @param Token $token
61 | * @param BoolOperator $boolOperator
62 | * @param ComparisonOperator $comparisonOperator
63 | *
64 | * @return Node[]
65 | */
66 | private function createNodes(
67 | Token $token,
68 | BoolOperator $boolOperator,
69 | ?ComparisonOperator $comparisonOperator = null
70 | ): array {
71 | switch ($token->getType()) {
72 | case Token::T_WORD:
73 | $nodes = $this->createWord($token->getValue(), $boolOperator);
74 | break;
75 |
76 | case Token::T_DATE:
77 | $nodes = $this->createDate($token->getValue(), $boolOperator, $comparisonOperator);
78 | break;
79 |
80 | case Token::T_EMOJI:
81 | $nodes = $this->createEmoji($token->getValue(), $boolOperator);
82 | break;
83 |
84 | case Token::T_EMOTICON:
85 | $nodes = $this->createEmoticon($token->getValue(), $boolOperator);
86 | break;
87 |
88 | case Token::T_FIELD_START:
89 | $nodes = $this->handleField($token->getValue(), $boolOperator);
90 | break;
91 |
92 | case Token::T_HASHTAG:
93 | $nodes = $this->createHashtag($token->getValue(), $boolOperator);
94 | break;
95 |
96 | case Token::T_MENTION:
97 | $nodes = $this->createMention($token->getValue(), $boolOperator);
98 | break;
99 |
100 | case Token::T_NUMBER:
101 | $nodes = $this->createNumber($token->getValue(), $comparisonOperator);
102 | break;
103 |
104 | case Token::T_PHRASE:
105 | $nodes = $this->createPhrase($token->getValue(), $boolOperator);
106 | break;
107 |
108 | case Token::T_SUBQUERY_START:
109 | $nodes = $this->handleSubquery($boolOperator);
110 | break;
111 |
112 | case Token::T_URL:
113 | $nodes = $this->createUrl($token->getValue(), $boolOperator);
114 | break;
115 |
116 | default:
117 | $nodes = [];
118 | break;
119 | }
120 |
121 | return $nodes instanceof Node ? [$nodes] : $nodes;
122 | }
123 |
124 | /**
125 | * @param string $fieldName
126 | * @param BoolOperator $boolOperator
127 | *
128 | * @return Field|Node[]|Node
129 | */
130 | private function handleField(string $fieldName, BoolOperator $boolOperator)
131 | {
132 | $lookahead = $this->stream->getLookahead();
133 | if (!$lookahead instanceof Token) {
134 | return $this->createWord($fieldName, $boolOperator);
135 | }
136 |
137 | $this->stream->next();
138 |
139 | switch ($lookahead->getType()) {
140 | case Token::T_RANGE_INCL_START:
141 | case Token::T_RANGE_EXCL_START:
142 | return $this->handleFieldWithRange($fieldName, $boolOperator);
143 |
144 | case Token::T_SUBQUERY_START:
145 | return $this->handleFieldWithSubquery($fieldName, $boolOperator);
146 |
147 | case Token::T_FIELD_END:
148 | return $this->createWord($fieldName, $boolOperator);
149 |
150 | default:
151 | break;
152 | }
153 |
154 | $this->stream->nextIfAnyOf([
155 | Token::T_REQUIRED,
156 | Token::T_PROHIBITED,
157 | Token::T_WILDCARD,
158 | Token::T_FUZZY,
159 | Token::T_BOOST,
160 | ]);
161 |
162 | $comparisonOperator = $this->getComparisonOperator();
163 | $fieldValue = $this->stream->getCurrent();
164 | $nodes = $this->createNodes($fieldValue, BoolOperator::OPTIONAL, $comparisonOperator);
165 | $this->stream->skipUntil(Token::T_FIELD_END);
166 |
167 | if (empty($nodes)) {
168 | return $this->createWord($fieldName, $boolOperator);
169 | }
170 |
171 | if (count($nodes) > 1) {
172 | return $nodes;
173 | }
174 |
175 | $m = $this->getModifiers();
176 | return new Field($fieldName, $nodes[0], $boolOperator, $m['use_boost'], $m['boost']);
177 | }
178 |
179 | /**
180 | * @param string $fieldName
181 | * @param BoolOperator $boolOperator
182 | *
183 | * @return Field|Node[]|Node
184 | */
185 | private function handleFieldWithRange(string $fieldName, BoolOperator $boolOperator)
186 | {
187 | $exclusive = $this->stream->typeIs(Token::T_RANGE_EXCL_START);
188 | $matchTypes = true;
189 | $this->stream->next();
190 |
191 | switch ($this->stream->getCurrent()->getType()) {
192 | case Token::T_NUMBER:
193 | $lowerNode = $this->createNumber($this->stream->getCurrent()->getValue());
194 | break;
195 |
196 | case Token::T_DATE:
197 | $lowerNode = $this->createDate($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL);
198 | break;
199 |
200 | case Token::T_WORD:
201 | $lowerNode = $this->createWord($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL);
202 | break;
203 |
204 | default:
205 | $lowerNode = null;
206 | $matchTypes = false;
207 | break;
208 | }
209 |
210 | $this->stream->skipUntil(Token::T_TO);
211 | $this->stream->nextIf(Token::T_TO);
212 |
213 | switch ($this->stream->getCurrent()->getType()) {
214 | case Token::T_NUMBER:
215 | $upperNode = $this->createNumber($this->stream->getCurrent()->getValue());
216 | break;
217 |
218 | case Token::T_DATE:
219 | $upperNode = $this->createDate($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL);
220 | break;
221 |
222 | case Token::T_WORD:
223 | $upperNode = $this->createWord($this->stream->getCurrent()->getValue(), BoolOperator::OPTIONAL);
224 | break;
225 |
226 | default:
227 | $upperNode = null;
228 | $matchTypes = false;
229 | break;
230 | }
231 |
232 | $this->stream->skipUntil(Token::T_FIELD_END);
233 |
234 | // todo: add field name and/or nodes that aren't null as words?
235 | // todo: handle mismatched node
236 | if ($matchTypes && !$lowerNode instanceof $upperNode) {
237 | $nodes = [];
238 |
239 | if ($lowerNode instanceof Node) {
240 | $nodes[] = $lowerNode;
241 | }
242 |
243 | if ($upperNode instanceof Node) {
244 | $nodes[] = $upperNode;
245 | }
246 |
247 | if (empty($nodes)) {
248 | return $this->createWord($fieldName, $boolOperator);
249 | }
250 |
251 | $m = $this->getModifiers();
252 |
253 | if (count($nodes) === 1) {
254 | return new Field($fieldName, $nodes[0], $boolOperator, $m['use_boost'], $m['boost']);
255 | }
256 |
257 | $subquery = new Subquery($nodes, null, $m['use_boost'], $m['boost']);
258 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']);
259 | }
260 |
261 | $m = $this->getModifiers();
262 |
263 | if ($lowerNode instanceof Numbr || $upperNode instanceof Numbr) {
264 | $range = new NumberRange($lowerNode, $upperNode, $exclusive);
265 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']);
266 | } elseif ($lowerNode instanceof Date || $upperNode instanceof Date) {
267 | $range = new DateRange($lowerNode, $upperNode, $exclusive);
268 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']);
269 | } elseif ($lowerNode instanceof Word || $upperNode instanceof Word) {
270 | $range = new WordRange($lowerNode, $upperNode, $exclusive);
271 | return new Field($fieldName, $range, $boolOperator, $m['use_boost'], $m['boost']);
272 | }
273 |
274 | return $this->createWord($fieldName, $boolOperator);
275 | }
276 |
277 | /**
278 | * @param string $fieldName
279 | * @param BoolOperator $boolOperator
280 | *
281 | * @return Field|Node
282 | */
283 | private function handleFieldWithSubquery(string $fieldName, BoolOperator $boolOperator): Node
284 | {
285 | $this->stream->nextIf(Token::T_SUBQUERY_START);
286 | $subquery = $this->handleSubquery($boolOperator);
287 | $this->stream->skipUntil(Token::T_FIELD_END);
288 |
289 | if ($subquery instanceof Subquery) {
290 | $m = $this->getModifiers();
291 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']);
292 | }
293 |
294 | if (empty($subquery)) {
295 | return $this->createWord($fieldName, $boolOperator);
296 | }
297 |
298 | $m = $this->getModifiers();
299 | return new Field($fieldName, $subquery, $boolOperator, $m['use_boost'], $m['boost']);
300 | }
301 |
302 | /**
303 | * @param BoolOperator $queryBoolOperator
304 | *
305 | * @return Subquery|Node[]|Node
306 | */
307 | private function handleSubquery(BoolOperator $queryBoolOperator)
308 | {
309 | $this->stream->nextIf(Token::T_SUBQUERY_START);
310 | /** @var Node[] $nodes */
311 | $nodes = [];
312 |
313 | do {
314 | $boolOperator = $this->getBoolOperator();
315 | $comparisonOperator = $this->getComparisonOperator();
316 | $nodes = array_merge(
317 | $nodes,
318 | $this->createNodes($this->stream->getCurrent(), $boolOperator, $comparisonOperator)
319 | );
320 |
321 | if (!$this->stream->next()) {
322 | break;
323 | }
324 | } while (!$this->stream->typeIs(Token::T_SUBQUERY_END));
325 |
326 | if (empty($nodes)) {
327 | return [];
328 | }
329 |
330 | $m = $this->getModifiers();
331 |
332 | /*
333 | * if we only found one node within the subquery then we'll take the original query bool
334 | * operator, recreate the node with that (unless it has its own) and any modifiers found
335 | * and magically convert "+(cats)^5 to "+cats^5" or "-(+cats)~2 to "+cats~2" etc.
336 | */
337 | if (count($nodes) === 1) {
338 | $data = $nodes[0]->toArray();
339 |
340 | if (!isset($data['bool_operator'])) {
341 | $data['bool_operator'] = $queryBoolOperator->value;
342 | }
343 |
344 | if (!isset($data['use_boost'])) {
345 | $data['use_boost'] = $m['use_boost'];
346 | }
347 |
348 | if (!isset($data['boost'])) {
349 | $data['boost'] = $m['boost'];
350 | }
351 |
352 | if (!isset($data['use_fuzzy'])) {
353 | $data['use_fuzzy'] = $m['use_fuzzy'];
354 | }
355 |
356 | if (!isset($data['fuzzy'])) {
357 | $data['fuzzy'] = $m['fuzzy'];
358 | }
359 |
360 | if (!isset($data['trailing_wildcard'])) {
361 | $data['trailing_wildcard'] = $m['trailing_wildcard'];
362 | }
363 |
364 | return $nodes[0]::fromArray($data);
365 | }
366 |
367 | return new Subquery($nodes, $queryBoolOperator, $m['use_boost'], $m['boost']);
368 | }
369 |
370 | private function createDate(
371 | string $value,
372 | BoolOperator $boolOperator,
373 | ?ComparisonOperator $comparisonOperator = null
374 | ): Date {
375 | $m = $this->getModifiers();
376 | return new Date(
377 | $value,
378 | $boolOperator,
379 | $m['use_boost'],
380 | $m['boost'],
381 | $m['use_fuzzy'],
382 | $m['fuzzy'],
383 | $comparisonOperator
384 | );
385 | }
386 |
387 | private function createEmoji(string $value, BoolOperator $boolOperator): Emoji
388 | {
389 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator;
390 | $m = $this->getModifiers();
391 | return new Emoji($value, $boolOperator, $m['use_boost'], $m['boost']);
392 | }
393 |
394 | private function createEmoticon(string $value, BoolOperator $boolOperator): Emoticon
395 | {
396 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator;
397 | $m = $this->getModifiers();
398 | return new Emoticon($value, $boolOperator, $m['use_boost'], $m['boost']);
399 | }
400 |
401 | private function createHashtag(string $value, BoolOperator $boolOperator): Hashtag
402 | {
403 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator;
404 | $m = $this->getModifiers();
405 | return new Hashtag($value, $boolOperator, $m['use_boost'], $m['boost']);
406 | }
407 |
408 | private function createMention(string $value, BoolOperator $boolOperator): Mention
409 | {
410 | $boolOperator = $boolOperator === BoolOperator::OPTIONAL ? BoolOperator::REQUIRED : $boolOperator;
411 | $m = $this->getModifiers();
412 | return new Mention($value, $boolOperator, $m['use_boost'], $m['boost']);
413 | }
414 |
415 | private function createNumber(float $value, ?ComparisonOperator $comparisonOperator = null): Numbr
416 | {
417 | // move the stream and ignore them if they exist
418 | $this->getModifiers();
419 | return new Numbr($value, $comparisonOperator);
420 | }
421 |
422 | private function createPhrase(string $value, BoolOperator $boolOperator): Phrase
423 | {
424 | $m = $this->getModifiers();
425 | return new Phrase($value, $boolOperator, $m['use_boost'], $m['boost'], $m['use_fuzzy'], $m['fuzzy']);
426 | }
427 |
428 | private function createUrl(string $value, BoolOperator $boolOperator): Url
429 | {
430 | $m = $this->getModifiers();
431 | return new Url($value, $boolOperator, $m['use_boost'], $m['boost']);
432 | }
433 |
434 | private function createWord(string $value, BoolOperator $boolOperator): Word
435 | {
436 | $m = $this->getModifiers();
437 | return new Word(
438 | $value,
439 | $boolOperator,
440 | $m['use_boost'],
441 | $m['boost'],
442 | $m['use_fuzzy'],
443 | $m['fuzzy'],
444 | $m['trailing_wildcard']
445 | );
446 | }
447 |
448 | private function getBoolOperator(): BoolOperator
449 | {
450 | if ($this->stream->nextIf(Token::T_REQUIRED)
451 | || $this->stream->lookaheadTypeIs(Token::T_AND)
452 | || $this->stream->prevTypeIs(Token::T_AND)
453 | ) {
454 | return BoolOperator::REQUIRED;
455 | }
456 |
457 | if ($this->stream->nextIf(Token::T_PROHIBITED)) {
458 | return BoolOperator::PROHIBITED;
459 | }
460 |
461 | return BoolOperator::OPTIONAL;
462 | }
463 |
464 | private function getComparisonOperator(): ?ComparisonOperator
465 | {
466 | if ($this->stream->nextIf(Token::T_GREATER_THAN)) {
467 | $op = ComparisonOperator::GT->value;
468 | } elseif ($this->stream->nextIf(Token::T_LESS_THAN)) {
469 | $op = ComparisonOperator::LT->value;
470 | } else {
471 | return null;
472 | }
473 |
474 | if ($this->stream->nextIf(Token::T_EQUALS)) {
475 | $op .= 'e';
476 | }
477 |
478 | return ComparisonOperator::from($op);
479 | }
480 |
481 | private function getModifiers(): array
482 | {
483 | $array = [
484 | 'trailing_wildcard' => $this->stream->nextIfLookahead(Token::T_WILDCARD),
485 | 'use_boost' => false,
486 | 'boost' => Node::DEFAULT_BOOST,
487 | 'use_fuzzy' => false,
488 | 'fuzzy' => Node::DEFAULT_FUZZY,
489 | ];
490 |
491 | if ($this->stream->nextIfLookahead(Token::T_BOOST) && $this->stream->nextIfLookahead(Token::T_NUMBER)) {
492 | $array['use_boost'] = true;
493 | $array['boost'] = (float)$this->stream->getCurrent()->getValue();
494 | }
495 |
496 | if ($this->stream->nextIfLookahead(Token::T_FUZZY)) {
497 | $array['use_fuzzy'] = true;
498 | if ($this->stream->nextIfLookahead(Token::T_NUMBER)) {
499 | $array['fuzzy'] = (int)$this->stream->getCurrent()->getValue();
500 | }
501 | }
502 |
503 | return $array;
504 | }
505 | }
506 |
--------------------------------------------------------------------------------
/src/Token.php:
--------------------------------------------------------------------------------
1 | '
15 | const T_LESS_THAN = 7; // '<'
16 | const T_EQUALS = 8; // '='
17 | const T_FUZZY = 9; // '~'
18 | const T_BOOST = 10; // '^'
19 | const T_RANGE_INCL_START = 11; // '['
20 | const T_RANGE_INCL_END = 12; // ']'
21 | const T_RANGE_EXCL_START = 13; // '{'
22 | const T_RANGE_EXCL_END = 14; // '}'
23 | const T_SUBQUERY_START = 15; // '('
24 | const T_SUBQUERY_END = 16; // ')'
25 | const T_WILDCARD = 17; // '*'
26 | const T_AND = 18; // 'AND' or '&&'
27 | const T_OR = 19; // 'OR' or '||'
28 | const T_TO = 20; // 'TO' or '..'
29 | const T_WORD = 21;
30 | const T_FIELD_START = 22; // The "field:" portion of "field:value".
31 | const T_FIELD_END = 23; // when a field lexeme ends, i.e. "field:value". This token has no value.
32 | const T_PHRASE = 24; // Phrase (one or more quoted words)
33 | const T_URL = 25; // a valid url
34 | const T_DATE = 26; // date in the format YYYY-MM-DD
35 | const T_HASHTAG = 27; // #hashtag
36 | const T_MENTION = 28; // @mention
37 | const T_EMOTICON = 29; // see https://en.wikipedia.org/wiki/Emoticon
38 | const T_EMOJI = 30; // see https://en.wikipedia.org/wiki/Emoji
39 |
40 | /**
41 | * Array of the type names by id (constants flipped)
42 | *
43 | * @var array
44 | */
45 | private static array $typeNames;
46 |
47 | private int $type;
48 |
49 | /** @var string|float|null */
50 | private $value;
51 |
52 | /**
53 | * @param int $type
54 | * @param string|float|null $value
55 | */
56 | public function __construct(int $type, $value = null)
57 | {
58 | $this->type = $type;
59 | $this->value = $value;
60 | }
61 |
62 | /**
63 | * Gets the name of the type (a T_FOO constant) by its integer value.
64 | *
65 | * @param int $type
66 | *
67 | * @return string
68 | */
69 | public static function name(int $type): string
70 | {
71 | if (null === self::$typeNames) {
72 | static::$typeNames = array_flip((new \ReflectionClass(__CLASS__))->getConstants());
73 | }
74 |
75 | return self::$typeNames[$type] ?? (string)$type;
76 | }
77 |
78 | public function jsonSerialize(): array
79 | {
80 | return ['type' => $this->type, 'value' => $this->value];
81 | }
82 |
83 | public function getTypeName(): string
84 | {
85 | return self::name($this->type);
86 | }
87 |
88 | public function getType(): int
89 | {
90 | return $this->type;
91 | }
92 |
93 | /**
94 | * @return string|float|null
95 | */
96 | public function getValue()
97 | {
98 | return $this->value;
99 | }
100 |
101 | public function typeEquals(int $type): bool
102 | {
103 | return $type === $this->type;
104 | }
105 |
106 | /**
107 | * @param int[] $types
108 | *
109 | * @return bool
110 | */
111 | public function typeEqualsAnyOf(array $types): bool
112 | {
113 | return in_array($this->type, $types, true);
114 | }
115 |
116 | public function isWhiteSpace(): bool
117 | {
118 | return self::T_WHITE_SPACE === $this->type;
119 | }
120 |
121 | public function isIgnored(): bool
122 | {
123 | return self::T_IGNORED === $this->type;
124 | }
125 |
126 | public function isEndOfInput(): bool
127 | {
128 | return self::T_EOI === $this->type;
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/src/TokenStream.php:
--------------------------------------------------------------------------------
1 | tokens = $tokens;
26 | $this->reset();
27 | }
28 |
29 | /**
30 | * Resets the stream.
31 | *
32 | * @return self
33 | */
34 | public function reset(): self
35 | {
36 | $this->position = 0;
37 | $this->current = $this->tokens[$this->position] ?? self::$eoi;
38 | return $this;
39 | }
40 |
41 | /**
42 | * Increments the position and sets the current token to the previous token.
43 | * Returns true if the new "current" is not EOI.
44 | *
45 | * @return bool
46 | */
47 | public function next(): bool
48 | {
49 | $this->current = isset($this->tokens[$this->position]) ? $this->tokens[$this->position++] : self::$eoi;
50 | return !$this->current->typeEquals(Token::T_EOI);
51 | }
52 |
53 | /**
54 | * Skips tokens until it sees a token with the given value.
55 | *
56 | * @param int $type
57 | */
58 | public function skipUntil(int $type): void
59 | {
60 | while (!$this->current->typeEquals($type) && !$this->current->typeEquals(Token::T_EOI)) {
61 | $this->next();
62 | }
63 | }
64 |
65 | /**
66 | * If the current token type matches the given type, move to the next token.
67 | * Returns true if next was fired.
68 | *
69 | * @param int $type
70 | *
71 | * @return bool
72 | */
73 | public function nextIf(int $type): bool
74 | {
75 | if (!$this->current->typeEquals($type)) {
76 | return false;
77 | }
78 |
79 | $this->next();
80 | return true;
81 | }
82 |
83 | /**
84 | * If the current token type matches any of the given types, move to the next token.
85 | * Returns true if next was fired.
86 | *
87 | * @param int[] $types
88 | *
89 | * @return bool
90 | */
91 | public function nextIfAnyOf(array $types): bool
92 | {
93 | if (!$this->current->typeEqualsAnyOf($types)) {
94 | return false;
95 | }
96 |
97 | $this->next();
98 | return true;
99 | }
100 |
101 | /**
102 | * If the lookahead token type matches the given type, move to the next token.
103 | *
104 | * @param int $type
105 | *
106 | * @return bool
107 | */
108 | public function nextIfLookahead(int $type): bool
109 | {
110 | if (!isset($this->tokens[$this->position]) || !$this->tokens[$this->position]->typeEquals($type)) {
111 | return false;
112 | }
113 |
114 | $this->next();
115 | return true;
116 | }
117 |
118 | /**
119 | * If the lookahead token type matches any of the given types, move to the next token.
120 | *
121 | * @param int[] $types
122 | *
123 | * @return bool
124 | */
125 | public function nextIfLookaheadAnyOf(array $types): bool
126 | {
127 | if (!isset($this->tokens[$this->position]) || !$this->tokens[$this->position]->typeEqualsAnyOf($types)) {
128 | return false;
129 | }
130 |
131 | $this->next();
132 | return true;
133 | }
134 |
135 | /**
136 | * Returns true if the current type equals the given type.
137 | *
138 | * @param int $type
139 | *
140 | * @return bool
141 | */
142 | public function typeIs(int $type): bool
143 | {
144 | return $this->current->typeEquals($type);
145 | }
146 |
147 | /**
148 | * Returns true if the current type equals any of the given types.
149 | *
150 | * @param int[] $types
151 | *
152 | * @return bool
153 | */
154 | public function typeIsAnyOf(array $types): bool
155 | {
156 | return $this->current->typeEqualsAnyOf($types);
157 | }
158 |
159 | /**
160 | * Returns true if the lookahead type equals the given type.
161 | *
162 | * @param int $type
163 | *
164 | * @return bool
165 | */
166 | public function lookaheadTypeIs(int $type): bool
167 | {
168 | return isset($this->tokens[$this->position]) && $this->tokens[$this->position]->typeEquals($type);
169 | }
170 |
171 | /**
172 | * Returns true if the lookahead type equals any of the given types.
173 | *
174 | * @param int[] $types
175 | *
176 | * @return bool
177 | */
178 | public function lookaheadTypeIsAnyOf(array $types): bool
179 | {
180 | return isset($this->tokens[$this->position]) && $this->tokens[$this->position]->typeEqualsAnyOf($types);
181 | }
182 |
183 | /**
184 | * Returns true if the previous token type equals the given type.
185 | *
186 | * @param int $type
187 | *
188 | * @return bool
189 | */
190 | public function prevTypeIs(int $type): bool
191 | {
192 | return isset($this->tokens[$this->position - 2]) && $this->tokens[$this->position - 2]->typeEquals($type);
193 | }
194 |
195 | /**
196 | * Returns true if the previous token type equals any of the given types.
197 | *
198 | * @param int[] $types
199 | *
200 | * @return bool
201 | */
202 | public function prevTypeIsAnyOf(array $types): bool
203 | {
204 | return isset($this->tokens[$this->position - 2]) && $this->tokens[$this->position - 2]->typeEqualsAnyOf($types);
205 | }
206 |
207 | public function getCurrent(): Token
208 | {
209 | return $this->current;
210 | }
211 |
212 | public function getLookahead(): ?Token
213 | {
214 | return $this->tokens[$this->position] ?: null;
215 | }
216 |
217 | /**
218 | * Returns all tokens in this stream.
219 | *
220 | * @return Token[]
221 | */
222 | public function getTokens(): array
223 | {
224 | return $this->tokens;
225 | }
226 |
227 | public function jsonSerialize(): array
228 | {
229 | return $this->tokens;
230 | }
231 | }
232 |
--------------------------------------------------------------------------------
/src/Tokenizer.php:
--------------------------------------------------------------------------------
1 | :\-?\(|:\-?\)|<3|:\'\(|:\-?\|:\-?\/|:\-?\(|:\-?\*|:\-?\||:o\)|:\-?o|=\-?\)|:\-?D|:\-?p|:\-?P|:\-?b|;\-?p|;\-?P|;\-?b|;\-?\))';
9 | const REGEX_EMOJI = '[\x{2712}\x{2714}\x{2716}\x{271d}\x{2721}\x{2728}\x{2733}\x{2734}\x{2744}\x{2747}\x{274c}\x{274e}\x{2753}-\x{2755}\x{2757}\x{2763}\x{2764}\x{2795}-\x{2797}\x{27a1}\x{27b0}\x{27bf}\x{2934}\x{2935}\x{2b05}-\x{2b07}\x{2b1b}\x{2b1c}\x{2b50}\x{2b55}\x{3030}\x{303d}\x{1f004}\x{1f0cf}\x{1f170}\x{1f171}\x{1f17e}\x{1f17f}\x{1f18e}\x{1f191}-\x{1f19a}\x{1f201}\x{1f202}\x{1f21a}\x{1f22f}\x{1f232}-\x{1f23a}\x{1f250}\x{1f251}\x{1f300}-\x{1f321}\x{1f324}-\x{1f393}\x{1f396}\x{1f397}\x{1f399}-\x{1f39b}\x{1f39e}-\x{1f3f0}\x{1f3f3}-\x{1f3f5}\x{1f3f7}-\x{1f4fd}\x{1f4ff}-\x{1f53d}\x{1f549}-\x{1f54e}\x{1f550}-\x{1f567}\x{1f56f}\x{1f570}\x{1f573}-\x{1f579}\x{1f587}\x{1f58a}-\x{1f58d}\x{1f590}\x{1f595}\x{1f596}\x{1f5a5}\x{1f5a8}\x{1f5b1}\x{1f5b2}\x{1f5bc}\x{1f5c2}-\x{1f5c4}\x{1f5d1}-\x{1f5d3}\x{1f5dc}-\x{1f5de}\x{1f5e1}\x{1f5e3}\x{1f5ef}\x{1f5f3}\x{1f5fa}-\x{1f64f}\x{1f680}-\x{1f6c5}\x{1f6cb}-\x{1f6d0}\x{1f6e0}-\x{1f6e5}\x{1f6e9}\x{1f6eb}\x{1f6ec}\x{1f6f0}\x{1f6f3}\x{1f910}-\x{1f918}\x{1f980}-\x{1f984}\x{1f9c0}\x{3297}\x{3299}\x{a9}\x{ae}\x{203c}\x{2049}\x{2122}\x{2139}\x{2194}-\x{2199}\x{21a9}\x{21aa}\x{231a}\x{231b}\x{2328}\x{2388}\x{23cf}\x{23e9}-\x{23f3}\x{23f8}-\x{23fa}\x{24c2}\x{25aa}\x{25ab}\x{25b6}\x{25c0}\x{25fb}-\x{25fe}\x{2600}-\x{2604}\x{260e}\x{2611}\x{2614}\x{2615}\x{2618}\x{261d}\x{2620}\x{2622}\x{2623}\x{2626}\x{262a}\x{262e}\x{262f}\x{2638}-\x{263a}\x{2648}-\x{2653}\x{2660}\x{2663}\x{2665}\x{2666}\x{2668}\x{267b}\x{267f}\x{2692}-\x{2694}\x{2696}\x{2697}\x{2699}\x{269b}\x{269c}\x{26a0}\x{26a1}\x{26aa}\x{26ab}\x{26b0}\x{26b1}\x{26bd}\x{26be}\x{26c4}\x{26c5}\x{26c8}\x{26ce}\x{26cf}\x{26d1}\x{26d3}\x{26d4}\x{26e9}\x{26ea}\x{26f0}-\x{26f5}\x{26f7}-\x{26fa}\x{26fd}\x{2702}\x{2705}\x{2708}-\x{270d}\x{270f}]|\x{23}\x{20e3}|\x{2a}\x{20e3}|\x{30}\x{20e3}|\x{31}\x{20e3}|\x{32}\x{20e3}|\x{33}\x{20e3}|\x{34}\x{20e3}|\x{35}\x{20e3}|\x{36}\x{20e3}|\x{37}\x{20e3}|\x{38}\x{20e3}|\x{39}\x{20e3}|\x{1f1e6}[\x{1f1e8}-\x{1f1ec}\x{1f1ee}\x{1f1f1}\x{1f1f2}\x{1f1f4}\x{1f1f6}-\x{1f1fa}\x{1f1fc}\x{1f1fd}\x{1f1ff}]|\x{1f1e7}[\x{1f1e6}\x{1f1e7}\x{1f1e9}-\x{1f1ef}\x{1f1f1}-\x{1f1f4}\x{1f1f6}-\x{1f1f9}\x{1f1fb}\x{1f1fc}\x{1f1fe}\x{1f1ff}]|\x{1f1e8}[\x{1f1e6}\x{1f1e8}\x{1f1e9}\x{1f1eb}-\x{1f1ee}\x{1f1f0}-\x{1f1f5}\x{1f1f7}\x{1f1fa}-\x{1f1ff}]|\x{1f1e9}[\x{1f1ea}\x{1f1ec}\x{1f1ef}\x{1f1f0}\x{1f1f2}\x{1f1f4}\x{1f1ff}]|\x{1f1ea}[\x{1f1e6}\x{1f1e8}\x{1f1ea}\x{1f1ec}\x{1f1ed}\x{1f1f7}-\x{1f1fa}]|\x{1f1eb}[\x{1f1ee}-\x{1f1f0}\x{1f1f2}\x{1f1f4}\x{1f1f7}]|\x{1f1ec}[\x{1f1e6}\x{1f1e7}\x{1f1e9}-\x{1f1ee}\x{1f1f1}-\x{1f1f3}\x{1f1f5}-\x{1f1fa}\x{1f1fc}\x{1f1fe}]|\x{1f1ed}[\x{1f1f0}\x{1f1f2}\x{1f1f3}\x{1f1f7}\x{1f1f9}\x{1f1fa}]|\x{1f1ee}[\x{1f1e8}-\x{1f1ea}\x{1f1f1}-\x{1f1f4}\x{1f1f6}-\x{1f1f9}]|\x{1f1ef}[\x{1f1ea}\x{1f1f2}\x{1f1f4}\x{1f1f5}]|\x{1f1f0}[\x{1f1ea}\x{1f1ec}-\x{1f1ee}\x{1f1f2}\x{1f1f3}\x{1f1f5}\x{1f1f7}\x{1f1fc}\x{1f1fe}\x{1f1ff}]|\x{1f1f1}[\x{1f1e6}-\x{1f1e8}\x{1f1ee}\x{1f1f0}\x{1f1f7}-\x{1f1fb}\x{1f1fe}]|\x{1f1f2}[\x{1f1e6}\x{1f1e8}-\x{1f1ed}\x{1f1f0}-\x{1f1ff}]|\x{1f1f3}[\x{1f1e6}\x{1f1e8}\x{1f1ea}-\x{1f1ec}\x{1f1ee}\x{1f1f1}\x{1f1f4}\x{1f1f5}\x{1f1f7}\x{1f1fa}\x{1f1ff}]|\x{1f1f4}\x{1f1f2}|\x{1f1f5}[\x{1f1e6}\x{1f1ea}-\x{1f1ed}\x{1f1f0}-\x{1f1f3}\x{1f1f7}-\x{1f1f9}\x{1f1fc}\x{1f1fe}]|\x{1f1f6}\x{1f1e6}|\x{1f1f7}[\x{1f1ea}\x{1f1f4}\x{1f1f8}\x{1f1fa}\x{1f1fc}]|\x{1f1f8}[\x{1f1e6}-\x{1f1ea}\x{1f1ec}-\x{1f1f4}\x{1f1f7}-\x{1f1f9}\x{1f1fb}\x{1f1fd}-\x{1f1ff}]|\x{1f1f9}[\x{1f1e6}\x{1f1e8}\x{1f1e9}\x{1f1eb}-\x{1f1ed}\x{1f1ef}-\x{1f1f4}\x{1f1f7}\x{1f1f9}\x{1f1fb}\x{1f1fc}\x{1f1ff}]|\x{1f1fa}[\x{1f1e6}\x{1f1ec}\x{1f1f2}\x{1f1f8}\x{1f1fe}\x{1f1ff}]|\x{1f1fb}[\x{1f1e6}\x{1f1e8}\x{1f1ea}\x{1f1ec}\x{1f1ee}\x{1f1f3}\x{1f1fa}]|\x{1f1fc}[\x{1f1eb}\x{1f1f8}]|\x{1f1fd}\x{1f1f0}|\x{1f1fe}[\x{1f1ea}\x{1f1f9}]|\x{1f1ff}[\x{1f1e6}\x{1f1f2}\x{1f1fc}]';
10 | const REGEX_URL = '[+-]?[\w-]+:\/\/[^\s\/$.?#].[^\s\^~]*';
11 | const REGEX_PHRASE = '[+-]?"(?:""|[^"])*"';
12 | const REGEX_HASHTAG = '[+-]?#+[a-zA-Z0-9_]+';
13 | const REGEX_MENTION = '[+-]?@+[a-zA-Z0-9_]+(?:[a-zA-Z0-9_\.\-]+)?';
14 | const REGEX_NUMBER = '(?:[+-]?[0-9]+(?:[\.][0-9]+)*)(?:[eE][+-]?[0-9]+)?';
15 | const REGEX_DATE = '[+-]?\d{4}-\d{2}-\d{2}';
16 | const REGEX_FIELD = '[+-]?[a-zA-Z\_]+(?:[a-zA-Z0-9_\.\-]+)?:';
17 | const REGEX_WORD = '[+-]?[^\s\(\)\\\\^\<\>\[\]\{\}~=]*';
18 | const REGEX_WORD_MINIMUM = '[a-zA-Z0-9\pL]+';
19 | const IGNORED_LEAD_TRAIL_CHARS = "#@,.!?;|&+-^~*\\\"' \t\n\r ";
20 |
21 | /**
22 | * When building a field lexeme we switch this on/off to establish proper T_FIELD_END.
23 | * It also helps us enforce range and subquery rules.
24 | *
25 | * @var bool
26 | */
27 | private bool $inField = false;
28 |
29 | /**
30 | * This tokenizer only supports one level of sub query (for now). We only want to take
31 | * a query from a user like "funny #cats plays:>500" and parse that to a simple
32 | * object which can be translated to a sql, elasticsearch, riak, etc. query.
33 | *
34 | * @var bool
35 | */
36 | private bool $inSubquery = false;
37 |
38 | /**
39 | * This tokenizer only supports one range to be open at a time (excl or incl).
40 | * Starting a new range of any type is ignored if it's already open and
41 | * closing a range that never started is also ignored.
42 | *
43 | * The value will be the type of range that is open or 0.
44 | *
45 | * @var int
46 | */
47 | private int $inRange = 0;
48 |
49 | /**
50 | * The regex used to split the initial input into chunks that will be
51 | * checked for tokens during scan/tokenization.
52 | *
53 | * @var string
54 | */
55 | private string $splitRegex;
56 |
57 | /** @var Token[] */
58 | private array $tokens = [];
59 |
60 | /**
61 | * The last token that was scanned.
62 | *
63 | * @var Token
64 | */
65 | private Token $lastToken;
66 |
67 | public function __construct()
68 | {
69 | $this->splitRegex = sprintf(
70 | '/(%s)/iu',
71 | implode(')|(', [
72 | self::REGEX_EMOTICON,
73 | self::REGEX_URL,
74 | self::REGEX_PHRASE,
75 | self::REGEX_FIELD,
76 | self::REGEX_WORD,
77 | ])
78 | );
79 | $this->lastToken = new Token(Token::T_WHITE_SPACE);
80 | }
81 |
82 | /**
83 | * The Tokenizer is immediately reset and the new input tokenized.
84 | * Any unprocessed tokens from any previous input are lost.
85 | *
86 | * @param string $input
87 | *
88 | * @return TokenStream
89 | */
90 | public function scan(string $input): TokenStream
91 | {
92 | $input = str_replace('""', '" "', preg_replace('/\s+/', ' ', ' ' . $input));
93 | // $input = substr($input, 0, 256); // lef
94 | $this->inField = false;
95 | $this->inSubquery = false;
96 | $this->inRange = 0;
97 | $this->tokens = [];
98 | $this->lastToken = new Token(Token::T_WHITE_SPACE);
99 |
100 | foreach ($this->splitInput($input) as $match) {
101 | $this->extractTokens(trim($match[0]));
102 |
103 | if ($this->lastToken->isWhiteSpace() && $this->inField && !$this->inRange && !$this->inSubquery) {
104 | $this->inField = false;
105 | $this->addOperatorToken(Token::T_FIELD_END);
106 | }
107 | }
108 |
109 | if ($this->inField) {
110 | $this->inField = false;
111 | $this->addOperatorToken(Token::T_FIELD_END);
112 | }
113 |
114 | if ($this->inSubquery) {
115 | $this->inSubquery = false;
116 | $this->addOperatorToken(Token::T_SUBQUERY_END);
117 | }
118 |
119 | $this->tokens = array_values(array_filter($this->tokens, function (Token $token) {
120 | return !$token->isWhiteSpace() && !$token->isIgnored();
121 | }));
122 |
123 | return new TokenStream($this->tokens);
124 | }
125 |
126 | /**
127 | * Splits the input into chunks that will be scanned for tokens.
128 | *
129 | * @param string $input
130 | *
131 | * @return array
132 | */
133 | private function splitInput(string $input): array
134 | {
135 | $flags = PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_OFFSET_CAPTURE;
136 | return preg_split($this->splitRegex, $input, -1, $flags);
137 | }
138 |
139 | /**
140 | * Adds an operator token (tokens with no value). This method
141 | * also ensures the same token is not repeated.
142 | *
143 | * @param int $type
144 | */
145 | private function addOperatorToken(int $type): void
146 | {
147 | if ($this->lastToken->typeEquals($type)) {
148 | return;
149 | }
150 |
151 | $token = new Token($type);
152 | $this->tokens[] = $token;
153 | $this->lastToken = $token;
154 | }
155 |
156 | private function addToken(int $type, float|string|null $value): void
157 | {
158 | $token = new Token($type, $value);
159 | $this->tokens[] = $token;
160 | $this->lastToken = $token;
161 | }
162 |
163 | private function extractTokens(string $value): void
164 | {
165 | if ('' === $value) {
166 | if ($this->lastToken->typeEqualsAnyOf([Token::T_REQUIRED, Token::T_PROHIBITED, Token::T_IGNORED])) {
167 | // todo: review the process of bool operators following ignored values.
168 | array_pop($this->tokens);
169 | }
170 | $this->addOperatorToken(Token::T_WHITE_SPACE);
171 | return;
172 | }
173 |
174 | if (is_numeric($value)) {
175 | $this->addToken(Token::T_NUMBER, (float)$value);
176 | return;
177 | }
178 |
179 | if ($this->extractSymbolOrKeyword($value)) {
180 | return;
181 | }
182 |
183 | switch ($value[0]) {
184 | case '+':
185 | $this->addOperatorToken(Token::T_REQUIRED);
186 | $value = substr($value, 1);
187 | break;
188 |
189 | case '-':
190 | $this->addOperatorToken(Token::T_PROHIBITED);
191 | $value = substr($value, 1);
192 | break;
193 |
194 | default:
195 | break;
196 | }
197 |
198 | if (preg_match('/^' . self::REGEX_EMOTICON . '$/', $value)) {
199 | $this->addToken(Token::T_EMOTICON, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
200 | return;
201 | }
202 |
203 | if (preg_match('/^' . self::REGEX_EMOJI . '$/u', $value)) {
204 | $this->addToken(Token::T_EMOJI, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
205 | return;
206 | }
207 |
208 | if (preg_match('/^' . self::REGEX_URL . '$/', $value)) {
209 | $this->addToken(Token::T_URL, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
210 | return;
211 | }
212 |
213 | if (!$this->inField && !$this->inSubquery
214 | && preg_match('/^' . self::REGEX_FIELD . '$/', $value)
215 | && $this->lastToken->typeEqualsAnyOf([
216 | Token::T_WHITE_SPACE,
217 | Token::T_REQUIRED,
218 | Token::T_PROHIBITED,
219 | Token::T_FIELD_END,
220 | Token::T_SUBQUERY_START,
221 | ])
222 | ) {
223 | $this->inField = true;
224 | $this->addToken(Token::T_FIELD_START, rtrim($value, ':'));
225 | return;
226 | }
227 |
228 | if (preg_match('/^' . self::REGEX_PHRASE . '$/', $value)) {
229 | $value = trim(trim($value, '"'));
230 | if (!empty($value)) {
231 | $this->addToken(Token::T_PHRASE, $value);
232 | } else {
233 | $this->addToken(Token::T_IGNORED, $value);
234 | }
235 | return;
236 | }
237 |
238 | if (str_contains($value, '..')) {
239 | $parts = explode('..', $value, 2);
240 | $this->extractTokens($parts[0]);
241 | $this->extractSymbolOrKeyword('..');
242 | $this->extractTokens($parts[1] ?? '');
243 | return;
244 | }
245 |
246 | if (preg_match('/^' . self::REGEX_HASHTAG . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) {
247 | $this->addToken(Token::T_HASHTAG, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
248 | return;
249 | }
250 |
251 | if (preg_match('/^' . self::REGEX_MENTION . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) {
252 | $this->addToken(Token::T_MENTION, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
253 | return;
254 | }
255 |
256 | if (preg_match('/^' . self::REGEX_DATE . '$/', rtrim($value, self::IGNORED_LEAD_TRAIL_CHARS))) {
257 | $this->addToken(Token::T_DATE, trim($value, self::IGNORED_LEAD_TRAIL_CHARS));
258 | return;
259 | }
260 |
261 | if (preg_match('/' . self::REGEX_WORD . '/', $value)) {
262 | $hasTrailingWildcard = str_ends_with($value, '*');
263 | $value2 = trim($value, self::IGNORED_LEAD_TRAIL_CHARS . '/');
264 | if (!empty($value2)) {
265 | /*
266 | * When in a field or subquery you can get a value which itself looks like the start
267 | * of a field, e.g. "field:vevo:video". We don't want two words here so
268 | * merge the last "word" token value with this one.
269 | */
270 | if ($this->lastToken->typeEquals(Token::T_WORD)
271 | && ':' === strrev($this->lastToken->getValue())[0]
272 | ) {
273 | $value2 = array_pop($this->tokens)->getValue() . $value2;
274 | }
275 |
276 | if (!preg_match('/' . self::REGEX_WORD_MINIMUM . '/u', $value2)) {
277 | $this->addToken(Token::T_IGNORED, $value2);
278 | return;
279 | }
280 |
281 | $this->addToken(Token::T_WORD, $value2);
282 |
283 | if ($hasTrailingWildcard) {
284 | $this->addOperatorToken(Token::T_WILDCARD);
285 | }
286 |
287 | return;
288 | }
289 | }
290 |
291 | $this->addToken(Token::T_IGNORED, $value);
292 | }
293 |
294 | /**
295 | * Extracts a symbol or keyword from the string and may ignore a token
296 | * if it doesn't follow some basic rules for this lib. E.g. you can't
297 | * boost whitespace " ^5". In that case, boost is ignored.
298 | *
299 | * @param string $value
300 | *
301 | * @return bool True if a symbol or keyword was extracted/processed.
302 | */
303 | private function extractSymbolOrKeyword(string $value): bool
304 | {
305 | $len = strlen($value);
306 | if ($len > 3) {
307 | return false;
308 | }
309 |
310 | switch ($value) {
311 | case '+':
312 | $this->addOperatorToken(Token::T_REQUIRED);
313 | return true;
314 |
315 | case '-':
316 | $this->addOperatorToken(Token::T_PROHIBITED);
317 | return true;
318 |
319 | case '>':
320 | if ($this->inField && 0 === $this->inRange) {
321 | $this->addOperatorToken(Token::T_GREATER_THAN);
322 | }
323 | return true;
324 |
325 | case '<':
326 | if ($this->inField && 0 === $this->inRange) {
327 | $this->addOperatorToken(Token::T_LESS_THAN);
328 | }
329 | return true;
330 |
331 | case '=':
332 | if ($this->lastToken->typeEquals(Token::T_GREATER_THAN)
333 | || $this->lastToken->typeEquals(Token::T_LESS_THAN)
334 | ) {
335 | $this->addOperatorToken(Token::T_EQUALS);
336 | }
337 | return true;
338 |
339 | case '~':
340 | // can't fuzzy parts of a field, range or sub query
341 | if ($this->inSubquery || 0 !== $this->inRange) {
342 | // fuzzy is ignored
343 | return true;
344 | }
345 |
346 | if (!$this->lastToken->isWhiteSpace()) {
347 | if ($this->inField) {
348 | $this->inField = false;
349 | $this->addOperatorToken(Token::T_FIELD_END);
350 | }
351 | $this->addOperatorToken(Token::T_FUZZY);
352 | }
353 | return true;
354 |
355 | case '^':
356 | // can't boost parts of a field, range or sub query
357 | if ($this->inSubquery || 0 !== $this->inRange) {
358 | // boost is ignored
359 | return true;
360 | }
361 |
362 | if (!$this->lastToken->isWhiteSpace()) {
363 | if ($this->inField) {
364 | $this->inField = false;
365 | $this->addOperatorToken(Token::T_FIELD_END);
366 | }
367 | $this->addOperatorToken(Token::T_BOOST);
368 | }
369 | return true;
370 |
371 | case '[':
372 | if ($this->inField && 0 === $this->inRange) {
373 | $this->inRange = Token::T_RANGE_INCL_START;
374 | $this->addOperatorToken(Token::T_RANGE_INCL_START);
375 | }
376 | return true;
377 |
378 | case '{':
379 | if ($this->inField && 0 === $this->inRange) {
380 | $this->inRange = Token::T_RANGE_EXCL_START;
381 | $this->addOperatorToken(Token::T_RANGE_EXCL_START);
382 | }
383 | return true;
384 |
385 | case ']':
386 | case '}':
387 | if (0 !== $this->inRange) {
388 | if (Token::T_RANGE_INCL_START === $this->inRange) {
389 | $this->addOperatorToken(Token::T_RANGE_INCL_END);
390 | } else {
391 | $this->addOperatorToken(Token::T_RANGE_EXCL_END);
392 | }
393 |
394 | $this->inRange = 0;
395 | $this->inField = false;
396 | $this->addOperatorToken(Token::T_FIELD_END);
397 | }
398 | return true;
399 |
400 | case '(':
401 | // sub queries can't be nested or exist in a range.
402 | if (!$this->inSubquery && 0 === $this->inRange) {
403 | $this->addOperatorToken(Token::T_SUBQUERY_START);
404 | $this->inSubquery = true;
405 | }
406 | return true;
407 |
408 | case ')':
409 | if ($this->inSubquery && 0 === $this->inRange) {
410 | $this->inSubquery = false;
411 | $this->addOperatorToken(Token::T_SUBQUERY_END);
412 |
413 | if ($this->inField) {
414 | $this->addOperatorToken(Token::T_FIELD_END);
415 | $this->inField = false;
416 | }
417 | }
418 | return true;
419 |
420 | case '*':
421 | $this->addOperatorToken(Token::T_WILDCARD);
422 | return true;
423 |
424 | case '||':
425 | case 'OR':
426 | $this->addOperatorToken(Token::T_OR);
427 | return true;
428 |
429 | case '&&':
430 | case 'AND':
431 | $this->addOperatorToken(Token::T_AND);
432 | return true;
433 |
434 | case '..':
435 | if (0 !== $this->inRange) {
436 | $this->addOperatorToken(Token::T_TO);
437 | }
438 | return true;
439 |
440 | case 'TO':
441 | if (0 !== $this->inRange) {
442 | $this->addOperatorToken(Token::T_TO);
443 | return true;
444 | }
445 |
446 | $this->addToken(Token::T_WORD, $value);
447 | return true;
448 |
449 | default:
450 | if (1 === $len) {
451 | if (ctype_alpha($value)) {
452 | /*
453 | * A word, followed ":", followed by a single char "thing:a".
454 | * can be made into one token.
455 | * todo: review words that look like fields. seems wonky.
456 | */
457 | if ($this->lastToken->typeEquals(Token::T_WORD)
458 | && ':' === strrev($this->lastToken->getValue())[0]
459 | ) {
460 | $value = array_pop($this->tokens)->getValue() . $value;
461 | }
462 |
463 | $this->addToken(Token::T_WORD, $value);
464 | return true;
465 | }
466 |
467 | $this->addToken(Token::T_IGNORED, $value);
468 | return true;
469 | }
470 | break;
471 | }
472 |
473 | return false;
474 | }
475 | }
476 |
--------------------------------------------------------------------------------
/tests/Builder/XmlQueryBuilderTest.php:
--------------------------------------------------------------------------------
1 | parser = new QueryParser();
19 | $this->builder = new XmlQueryBuilder();
20 | }
21 |
22 | /**
23 | * @dataProvider getTestQueries
24 | *
25 | * @param string $name
26 | * @param string $input
27 | * @param null $ignored
28 | * @param Node[] $expectedNodes
29 | */
30 | public function testToSimpleXmlElement(string $name, string $input, $ignored, array $expectedNodes = []): void
31 | {
32 | $this->builder->addParsedQuery($this->parser->parse($input));
33 | $xml = $this->builder->toSimpleXmlElement();
34 | $expectedNodeCount = count($expectedNodes);
35 |
36 | if ($expectedNodeCount && $xml->count() < $expectedNodeCount) {
37 | $this->fail('Failed to generate SimpleXmlElement from: ' . $input);
38 | }
39 |
40 | $this->assertSame($expectedNodeCount, $xml->count());
41 |
42 | /** @var \SimpleXmlElement $child */
43 | $i = 0;
44 | foreach ($xml->children() as $child) {
45 | if (!isset($expectedNodes[$i])) {
46 | $this->fail('Xml contains unexpected nodes');
47 | }
48 |
49 | $node = $expectedNodes[$i];
50 | $this->assertEquals(
51 | $node::NODE_TYPE,
52 | $child->getName(),
53 | "Test query [{$name}] with input [{$input}] failed."
54 | );
55 |
56 | $i++;
57 | }
58 | }
59 |
60 | public function getTestQueries(): array
61 | {
62 | return require __DIR__ . '/../Fixtures/test-queries.php';
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/tests/Fixtures/test-queries.php:
--------------------------------------------------------------------------------
1 | 'url',
26 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test',
27 | 'expected_tokens' => [
28 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
29 | ],
30 | 'expected_nodes' => [
31 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'),
32 | ],
33 | ],
34 |
35 | [
36 | 'name' => 'required url',
37 | 'input' => '+http://test.com/1_2.html?a=b%20&c=1+2#test',
38 | 'expected_tokens' => [
39 | T::T_REQUIRED,
40 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
41 | ],
42 | 'expected_nodes' => [
43 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', BoolOperator::REQUIRED),
44 | ],
45 | ],
46 |
47 | [
48 | 'name' => 'prohibited url',
49 | 'input' => '-http://test.com/1_2.html?a=b%20&c=1+2#test',
50 | 'expected_tokens' => [
51 | T::T_PROHIBITED,
52 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
53 | ],
54 | 'expected_nodes' => [
55 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', BoolOperator::PROHIBITED),
56 | ],
57 | ],
58 |
59 | [
60 | 'name' => 'url with boost int',
61 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test^5',
62 | 'expected_tokens' => [
63 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
64 | T::T_BOOST,
65 | [T::T_NUMBER, 5.0],
66 | ],
67 | 'expected_nodes' => [
68 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', null, true, 5.0),
69 | ],
70 | ],
71 |
72 | [
73 | 'name' => 'url with boost float',
74 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test^15.5',
75 | 'expected_tokens' => [
76 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
77 | T::T_BOOST,
78 | [T::T_NUMBER, 15.5],
79 | ],
80 | 'expected_nodes' => [
81 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test', null, true, Url::MAX_BOOST),
82 | ],
83 | ],
84 |
85 | [
86 | 'name' => 'url with fuzzy int',
87 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test~5',
88 | 'expected_tokens' => [
89 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
90 | T::T_FUZZY,
91 | [T::T_NUMBER, 5.0],
92 | ],
93 | 'expected_nodes' => [
94 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'),
95 | ],
96 | ],
97 |
98 | [
99 | 'name' => 'url with fuzzy float',
100 | 'input' => 'http://test.com/1_2.html?a=b%20&c=1+2#test~5.5',
101 | 'expected_tokens' => [
102 | [T::T_URL, 'http://test.com/1_2.html?a=b%20&c=1+2#test'],
103 | T::T_FUZZY,
104 | [T::T_NUMBER, 5.5],
105 | ],
106 | 'expected_nodes' => [
107 | new Url('http://test.com/1_2.html?a=b%20&c=1+2#test'),
108 | ],
109 | ],
110 | /*
111 | * END: URLS
112 | */
113 |
114 |
115 | /*
116 | * START: EMOTICONS
117 | * todo: need more emoticon tests
118 | */
119 | [
120 | 'name' => 'simple emoticons',
121 | 'input' => ':) :(',
122 | 'expected_tokens' => [
123 | [T::T_EMOTICON, ':)'],
124 | [T::T_EMOTICON, ':('],
125 | ],
126 | 'expected_nodes' => [
127 | new Emoticon(':)', BoolOperator::REQUIRED),
128 | new Emoticon(':(', BoolOperator::REQUIRED),
129 | ],
130 | ],
131 | /*
132 | * END: EMOTICONS
133 | */
134 |
135 |
136 | /*
137 | * START: EMOJIS
138 | */
139 | [
140 | 'name' => 'simple emoji',
141 | 'input' => 'ice 🍦 poop 💩 doh 😳',
142 | 'expected_tokens' => [
143 | [T::T_WORD, 'ice'],
144 | [T::T_EMOJI, '🍦'],
145 | [T::T_WORD, 'poop'],
146 | [T::T_EMOJI, '💩'],
147 | [T::T_WORD, 'doh'],
148 | [T::T_EMOJI, '😳'],
149 | ],
150 | 'expected_nodes' => [
151 | new Word('ice'),
152 | new Emoji('🍦', BoolOperator::REQUIRED),
153 | new Word('poop'),
154 | new Emoji('💩', BoolOperator::REQUIRED),
155 | new Word('doh'),
156 | new Emoji('😳', BoolOperator::REQUIRED),
157 | ],
158 | ],
159 | /*
160 | * END: EMOJIS
161 | */
162 |
163 |
164 | /*
165 | * START: BOOST AND FUZZY
166 | */
167 | [
168 | 'name' => 'boost and fuzzy in filter',
169 | 'input' => 'f:b^5 f:f~5',
170 | 'expected_tokens' => [
171 | [T::T_FIELD_START, 'f'],
172 | [T::T_WORD, 'b'],
173 | T::T_FIELD_END,
174 | T::T_BOOST,
175 | [T::T_NUMBER, 5.0],
176 | [T::T_FIELD_START, 'f'],
177 | [T::T_WORD, 'f'],
178 | T::T_FIELD_END,
179 | T::T_FUZZY,
180 | [T::T_NUMBER, 5.0],
181 | ],
182 | 'expected_nodes' => [
183 | new Field('f', new Word('b'), null, true, 5.0),
184 | new Field('f', new Word('f'), null, false, Field::DEFAULT_BOOST),
185 | ],
186 | ],
187 |
188 | [
189 | 'name' => 'boost and fuzzy in range',
190 | 'input' => 'f:[1^5..5]^5 f:[1~5..5]~5',
191 | 'expected_tokens' => [
192 | [T::T_FIELD_START, 'f'],
193 | T::T_RANGE_INCL_START,
194 | [T::T_NUMBER, 1.0],
195 | [T::T_NUMBER, 5.0],
196 | T::T_TO,
197 | [T::T_NUMBER, 5.0],
198 | T::T_RANGE_INCL_END,
199 | T::T_FIELD_END,
200 | T::T_BOOST,
201 | [T::T_NUMBER, 5.0],
202 | [T::T_FIELD_START, 'f'],
203 | T::T_RANGE_INCL_START,
204 | [T::T_NUMBER, 1.0],
205 | [T::T_NUMBER, 5.0],
206 | T::T_TO,
207 | [T::T_NUMBER, 5.0],
208 | T::T_RANGE_INCL_END,
209 | T::T_FIELD_END,
210 | T::T_FUZZY,
211 | [T::T_NUMBER, 5.0],
212 | ],
213 | 'expected_nodes' => [
214 | new Field(
215 | 'f',
216 | new NumberRange(
217 | new Numbr(1.0),
218 | new Numbr(5.0)
219 | ),
220 | null,
221 | true,
222 | 5.0
223 | ),
224 | new Field(
225 | 'f',
226 | new NumberRange(
227 | new Numbr(1.0),
228 | new Numbr(5.0)
229 | ),
230 | null,
231 | false,
232 | Field::DEFAULT_BOOST
233 | ),
234 | ],
235 | ],
236 | /*
237 | * END: BOOST AND FUZZY
238 | */
239 |
240 |
241 | /*
242 | * START: PHRASES
243 | */
244 | [
245 | 'name' => 'simple phrase',
246 | 'input' => 'a "simple phrase"',
247 | 'expected_tokens' => [
248 | [T::T_WORD, 'a'],
249 | [T::T_PHRASE, 'simple phrase'],
250 | ],
251 | 'expected_nodes' => [
252 | new Word('a'),
253 | new Phrase('simple phrase'),
254 | ],
255 | ],
256 |
257 | [
258 | 'name' => 'required phrase',
259 | 'input' => 'a +"simple phrase"',
260 | 'expected_tokens' => [
261 | [T::T_WORD, 'a'],
262 | T::T_REQUIRED,
263 | [T::T_PHRASE, 'simple phrase'],
264 | ],
265 | 'expected_nodes' => [
266 | new Word('a'),
267 | new Phrase('simple phrase', BoolOperator::REQUIRED),
268 | ],
269 | ],
270 |
271 | [
272 | 'name' => 'prohibited phrase',
273 | 'input' => 'a -"simple phrase"',
274 | 'expected_tokens' => [
275 | [T::T_WORD, 'a'],
276 | T::T_PROHIBITED,
277 | [T::T_PHRASE, 'simple phrase'],
278 | ],
279 | 'expected_nodes' => [
280 | new Word('a'),
281 | new Phrase('simple phrase', BoolOperator::PROHIBITED),
282 | ],
283 | ],
284 |
285 | [
286 | 'name' => 'boosted phrase int',
287 | 'input' => 'a "simple phrase"^1',
288 | 'expected_tokens' => [
289 | [T::T_WORD, 'a'],
290 | [T::T_PHRASE, 'simple phrase'],
291 | T::T_BOOST,
292 | [T::T_NUMBER, 1.0],
293 | ],
294 | 'expected_nodes' => [
295 | new Word('a'),
296 | new Phrase('simple phrase', null, true, 1.0),
297 | ],
298 | ],
299 |
300 | [
301 | 'name' => 'boosted phrase float',
302 | 'input' => 'a "simple phrase"^0.1',
303 | 'expected_tokens' => [
304 | [T::T_WORD, 'a'],
305 | [T::T_PHRASE, 'simple phrase'],
306 | T::T_BOOST,
307 | [T::T_NUMBER, 0.1],
308 | ],
309 | 'expected_nodes' => [
310 | new Word('a'),
311 | new Phrase('simple phrase', null, true, 0.1),
312 | ],
313 | ],
314 |
315 | [
316 | 'name' => 'fuzzy phrase int',
317 | 'input' => 'a "simple phrase"~1',
318 | 'expected_tokens' => [
319 | [T::T_WORD, 'a'],
320 | [T::T_PHRASE, 'simple phrase'],
321 | T::T_FUZZY,
322 | [T::T_NUMBER, 1.0],
323 | ],
324 | 'expected_nodes' => [
325 | new Word('a'),
326 | new Phrase('simple phrase', null, false, Phrase::DEFAULT_BOOST, true, Phrase::MIN_FUZZY),
327 | ],
328 | ],
329 |
330 | [
331 | 'name' => 'fuzzy phrase float',
332 | 'input' => 'a "simple phrase"~0.1',
333 | 'expected_tokens' => [
334 | [T::T_WORD, 'a'],
335 | [T::T_PHRASE, 'simple phrase'],
336 | T::T_FUZZY,
337 | [T::T_NUMBER, 0.1],
338 | ],
339 | 'expected_nodes' => [
340 | new Word('a'),
341 | new Phrase('simple phrase', null, false, Phrase::DEFAULT_BOOST, true, Phrase::MIN_FUZZY),
342 | ],
343 | ],
344 |
345 | [
346 | 'name' => 'phrase with embedded emoticons',
347 | 'input' => '"a smiley :)"',
348 | 'expected_tokens' => [
349 | [T::T_PHRASE, 'a smiley :)'],
350 | ],
351 | 'expected_nodes' => [
352 | new Phrase('a smiley :)'),
353 | ],
354 | ],
355 |
356 | [
357 | 'name' => 'phrase with embedded emojis',
358 | 'input' => '"ice cream 🍦"',
359 | 'expected_tokens' => [
360 | [T::T_PHRASE, 'ice cream 🍦'],
361 | ],
362 | 'expected_nodes' => [
363 | new Phrase('ice cream 🍦'),
364 | ],
365 | ],
366 |
367 | [
368 | 'name' => 'phrase with embedded punctation, boosting, etc.',
369 | 'input' => '"boosted^51.50 .. field:test~5"',
370 | 'expected_tokens' => [
371 | [T::T_PHRASE, 'boosted^51.50 .. field:test~5'],
372 | ],
373 | 'expected_nodes' => [
374 | new Phrase('boosted^51.50 .. field:test~5'),
375 | ],
376 | ],
377 |
378 | [
379 | 'name' => 'phrase with dates',
380 | 'input' => '"in the year >=2000-01-01"',
381 | 'expected_tokens' => [
382 | [T::T_PHRASE, 'in the year >=2000-01-01'],
383 | ],
384 | 'expected_nodes' => [
385 | new Phrase('in the year >=2000-01-01'),
386 | ],
387 | ],
388 |
389 | [
390 | 'name' => 'phrase on phrase',
391 | 'input' => '"p1""p2""p3',
392 | 'expected_tokens' => [
393 | [T::T_PHRASE, 'p1'],
394 | [T::T_PHRASE, 'p2'],
395 | [T::T_WORD, 'p3'],
396 | ],
397 | 'expected_nodes' => [
398 | new Phrase('p1'),
399 | new Phrase('p2'),
400 | new Word('p3'),
401 | ],
402 | ],
403 | /*
404 | * END: PHRASES
405 | */
406 |
407 |
408 | /*
409 | * START: HASHTAGS
410 | */
411 | [
412 | 'name' => 'simple hashtags',
413 | 'input' => 'a #Cat in a #hat',
414 | 'expected_tokens' => [
415 | [T::T_WORD, 'a'],
416 | [T::T_HASHTAG, 'Cat'],
417 | [T::T_WORD, 'in'],
418 | [T::T_WORD, 'a'],
419 | [T::T_HASHTAG, 'hat'],
420 | ],
421 | 'expected_nodes' => [
422 | new Word('a'),
423 | new Hashtag('Cat', BoolOperator::REQUIRED),
424 | new Word('in'),
425 | new Word('a'),
426 | new Hashtag('hat', BoolOperator::REQUIRED),
427 | ],
428 | ],
429 |
430 | [
431 | 'name' => 'required/prohibited hashtags with boost',
432 | 'input' => '+#Cat -#hat^100',
433 | 'expected_tokens' => [
434 | T::T_REQUIRED,
435 | [T::T_HASHTAG, 'Cat'],
436 | T::T_PROHIBITED,
437 | [T::T_HASHTAG, 'hat'],
438 | T::T_BOOST,
439 | [T::T_NUMBER, 100.0],
440 | ],
441 | 'expected_nodes' => [
442 | new Hashtag('Cat', BoolOperator::REQUIRED),
443 | new Hashtag('hat', BoolOperator::PROHIBITED, true, Hashtag::MAX_BOOST),
444 | ],
445 | ],
446 |
447 | [
448 | 'name' => 'required/prohibited hashtags with fuzzy',
449 | 'input' => '#hat~100 #hat~100.1',
450 | 'expected_tokens' => [
451 | [T::T_HASHTAG, 'hat'],
452 | T::T_FUZZY,
453 | [T::T_NUMBER, 100.0],
454 | [T::T_HASHTAG, 'hat'],
455 | T::T_FUZZY,
456 | [T::T_NUMBER, 100.1],
457 | ],
458 | 'expected_nodes' => [
459 | new Hashtag('hat', BoolOperator::REQUIRED),
460 | new Hashtag('hat', BoolOperator::REQUIRED),
461 | ],
462 | ],
463 |
464 | [
465 | 'name' => 'required/prohibited hashtags with boost',
466 | 'input' => '+#Cat -#hat^100 #_cat #2015cat__',
467 | 'expected_tokens' => [
468 | T::T_REQUIRED,
469 | [T::T_HASHTAG, 'Cat'],
470 | T::T_PROHIBITED,
471 | [T::T_HASHTAG, 'hat'],
472 | T::T_BOOST,
473 | [T::T_NUMBER, 100.0],
474 | [T::T_HASHTAG, '_cat'],
475 | [T::T_HASHTAG, '2015cat__'],
476 | ],
477 | 'expected_nodes' => [
478 | new Hashtag('Cat', BoolOperator::REQUIRED),
479 | new Hashtag('hat', BoolOperator::PROHIBITED, true, Hashtag::MAX_BOOST),
480 | new Hashtag('_cat', BoolOperator::REQUIRED),
481 | new Hashtag('2015cat__', BoolOperator::REQUIRED),
482 | ],
483 | ],
484 |
485 | // todo: should we refactor to catch #hashtag#hashtag or @mention#tag or #tag@mention?
486 | [
487 | 'name' => 'hashtag on hashtag and double hashtag',
488 | 'input' => '#cat#cat ##cat #####cat',
489 | 'expected_tokens' => [
490 | [T::T_WORD, 'cat#cat'],
491 | [T::T_HASHTAG, 'cat'],
492 | [T::T_HASHTAG, 'cat'],
493 | ],
494 | 'expected_nodes' => [
495 | new Word('cat#cat'),
496 | new Hashtag('cat', BoolOperator::REQUIRED),
497 | new Hashtag('cat', BoolOperator::REQUIRED),
498 | ],
499 | ],
500 | /*
501 | * END: HASHTAGS
502 | */
503 |
504 |
505 | /*
506 | * START: MENTIONS
507 | */
508 | [
509 | 'name' => 'simple mentions',
510 | 'input' => '@user @user_name @user.name @user-name',
511 | 'expected_tokens' => [
512 | [T::T_MENTION, 'user'],
513 | [T::T_MENTION, 'user_name'],
514 | [T::T_MENTION, 'user.name'],
515 | [T::T_MENTION, 'user-name'],
516 | ],
517 | 'expected_nodes' => [
518 | new Mention('user', BoolOperator::REQUIRED),
519 | new Mention('user_name', BoolOperator::REQUIRED),
520 | new Mention('user.name', BoolOperator::REQUIRED),
521 | new Mention('user-name', BoolOperator::REQUIRED),
522 |
523 | ],
524 | ],
525 |
526 | [
527 | 'name' => 'required mentions',
528 | 'input' => '+@user +@user_name +@user.name +@user-name',
529 | 'expected_tokens' => [
530 | T::T_REQUIRED,
531 | [T::T_MENTION, 'user'],
532 | T::T_REQUIRED,
533 | [T::T_MENTION, 'user_name'],
534 | T::T_REQUIRED,
535 | [T::T_MENTION, 'user.name'],
536 | T::T_REQUIRED,
537 | [T::T_MENTION, 'user-name'],
538 | ],
539 | 'expected_nodes' => [
540 | new Mention('user', BoolOperator::REQUIRED),
541 | new Mention('user_name', BoolOperator::REQUIRED),
542 | new Mention('user.name', BoolOperator::REQUIRED),
543 | new Mention('user-name', BoolOperator::REQUIRED),
544 | ],
545 | ],
546 |
547 | [
548 | 'name' => 'prohibited mentions',
549 | 'input' => '-@user -@user_name -@user.name -@user-name',
550 | 'expected_tokens' => [
551 | T::T_PROHIBITED,
552 | [T::T_MENTION, 'user'],
553 | T::T_PROHIBITED,
554 | [T::T_MENTION, 'user_name'],
555 | T::T_PROHIBITED,
556 | [T::T_MENTION, 'user.name'],
557 | T::T_PROHIBITED,
558 | [T::T_MENTION, 'user-name'],
559 | ],
560 | 'expected_nodes' => [
561 | new Mention('user', BoolOperator::PROHIBITED),
562 | new Mention('user_name', BoolOperator::PROHIBITED),
563 | new Mention('user.name', BoolOperator::PROHIBITED),
564 | new Mention('user-name', BoolOperator::PROHIBITED),
565 | ],
566 | ],
567 |
568 | [
569 | 'name' => 'mentions with emails and hashtags',
570 | 'input' => '@john@doe.com @john#doe',
571 | 'expected_tokens' => [
572 | [T::T_WORD, 'john@doe.com'],
573 | [T::T_WORD, 'john#doe'],
574 | ],
575 | 'expected_nodes' => [
576 | new Word('john@doe.com'),
577 | new Word('john#doe'),
578 | ],
579 | ],
580 |
581 | [
582 | 'name' => 'mentions with punctuation',
583 | 'input' => '@john. @wtf! @who?',
584 | 'expected_tokens' => [
585 | [T::T_MENTION, 'john'],
586 | [T::T_MENTION, 'wtf'],
587 | [T::T_MENTION, 'who'],
588 | ],
589 | 'expected_nodes' => [
590 | new Mention('john', BoolOperator::REQUIRED),
591 | new Mention('wtf', BoolOperator::REQUIRED),
592 | new Mention('who', BoolOperator::REQUIRED),
593 | ],
594 | ],
595 |
596 | [
597 | 'name' => 'mentions with special chars',
598 | 'input' => '@john^doe @john!doe',
599 | 'expected_tokens' => [
600 | [T::T_MENTION, 'john'],
601 | T::T_BOOST,
602 | [T::T_WORD, 'doe'],
603 | [T::T_WORD, 'john!doe'],
604 | ],
605 | 'expected_nodes' => [
606 | new Mention('john', BoolOperator::REQUIRED),
607 | new Word('doe'),
608 | new Word('john!doe'),
609 | ],
610 | ],
611 | /*
612 | * END: MENTIONS
613 | */
614 |
615 |
616 | /*
617 | * START: NUMBERS
618 | */
619 | [
620 | 'name' => 'integers, decimals and exponential form',
621 | 'input' => '100 3.1415926535898 2.2E-5',
622 | 'expected_tokens' => [
623 | [T::T_NUMBER, 100.0],
624 | [T::T_NUMBER, 3.1415926535898],
625 | [T::T_NUMBER, 2.2E-5],
626 | ],
627 | 'expected_nodes' => [
628 | new Numbr(100.0),
629 | new Numbr(3.1415926535898),
630 | new Numbr(2.2E-5),
631 | ],
632 | ],
633 |
634 | [
635 | 'name' => 'negative integers, decimals and exponential form',
636 | 'input' => '-100 -3.1415926535898 -2.2E-5',
637 | 'expected_tokens' => [
638 | [T::T_NUMBER, -100.0],
639 | [T::T_NUMBER, -3.1415926535898],
640 | [T::T_NUMBER, -2.2E-5],
641 | ],
642 | 'expected_nodes' => [
643 | new Numbr(-100.0),
644 | new Numbr(-3.1415926535898),
645 | new Numbr(-2.2E-5),
646 | ],
647 | ],
648 |
649 | [
650 | 'name' => 'words with boosted numbers',
651 | 'input' => 'word^100 word^3.1415926535898 word^2.2E-5',
652 | 'expected_tokens' => [
653 | [T::T_WORD, 'word'],
654 | T::T_BOOST,
655 | [T::T_NUMBER, 100.0],
656 | [T::T_WORD, 'word'],
657 | T::T_BOOST,
658 | [T::T_NUMBER, 3.1415926535898],
659 | [T::T_WORD, 'word'],
660 | T::T_BOOST,
661 | [T::T_NUMBER, 2.2E-5],
662 | ],
663 | 'expected_nodes' => [
664 | new Word('word', null, true, 10.0),
665 | new Word('word', null, true, 3.1415926535898),
666 | new Word('word', null, true, 2.2E-5),
667 | ],
668 | ],
669 |
670 | [
671 | 'name' => 'words with boosted negative numbers',
672 | 'input' => 'word^-100 word^-3.1415926535898 word^-2.2E-5',
673 | 'expected_tokens' => [
674 | [T::T_WORD, 'word'],
675 | T::T_BOOST,
676 | [T::T_NUMBER, -100.0],
677 | [T::T_WORD, 'word'],
678 | T::T_BOOST,
679 | [T::T_NUMBER, -3.1415926535898],
680 | [T::T_WORD, 'word'],
681 | T::T_BOOST,
682 | [T::T_NUMBER, -2.2E-5],
683 | ],
684 | 'expected_nodes' => [
685 | new Word('word', null, true, 0.0),
686 | new Word('word', null, true, 0.0),
687 | new Word('word', null, true, 0.0),
688 | ],
689 | ],
690 |
691 | [
692 | 'name' => 'words with fuzzy numbers',
693 | 'input' => 'word~100 word~3.1415926535898 word~2.2E-5',
694 | 'expected_tokens' => [
695 | [T::T_WORD, 'word'],
696 | T::T_FUZZY,
697 | [T::T_NUMBER, 100.0],
698 | [T::T_WORD, 'word'],
699 | T::T_FUZZY,
700 | [T::T_NUMBER, 3.1415926535898],
701 | [T::T_WORD, 'word'],
702 | T::T_FUZZY,
703 | [T::T_NUMBER, 2.2E-5],
704 | ],
705 | 'expected_nodes' => [
706 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MAX_FUZZY),
707 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MAX_FUZZY),
708 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY),
709 | ],
710 | ],
711 |
712 | [
713 | 'name' => 'words with fuzzy negative numbers',
714 | 'input' => 'word~-100 word~-3.1415926535898 word~-2.2E-5',
715 | 'expected_tokens' => [
716 | [T::T_WORD, 'word'],
717 | T::T_FUZZY,
718 | [T::T_NUMBER, -100.0],
719 | [T::T_WORD, 'word'],
720 | T::T_FUZZY,
721 | [T::T_NUMBER, -3.1415926535898],
722 | [T::T_WORD, 'word'],
723 | T::T_FUZZY,
724 | [T::T_NUMBER, -2.2E-5],
725 | ],
726 | 'expected_nodes' => [
727 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY),
728 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY),
729 | new Word('word', null, false, Word::DEFAULT_BOOST, true, Word::MIN_FUZZY),
730 | ],
731 | ],
732 | /*
733 | * END: NUMBERS
734 | */
735 |
736 |
737 | /*
738 | * START: FIELDS
739 | */
740 | [
741 | 'name' => 'fields with hypen, underscore and dot',
742 | 'input' => '+first-name:homer -last_name:simpson job.performance:poor^5',
743 | 'expected_tokens' => [
744 | T::T_REQUIRED,
745 | [T::T_FIELD_START, 'first-name'],
746 | [T::T_WORD, 'homer'],
747 | T::T_FIELD_END,
748 | T::T_PROHIBITED,
749 | [T::T_FIELD_START, 'last_name'],
750 | [T::T_WORD, 'simpson'],
751 | T::T_FIELD_END,
752 | [T::T_FIELD_START, 'job.performance'],
753 | [T::T_WORD, 'poor'],
754 | T::T_FIELD_END,
755 | T::T_BOOST,
756 | [T::T_NUMBER, 5.0],
757 | ],
758 | 'expected_nodes' => [
759 | new Field('first-name', new Word('homer'), BoolOperator::REQUIRED, false, Field::DEFAULT_BOOST),
760 | new Field('last_name', new Word('simpson'), BoolOperator::PROHIBITED, false, Field::DEFAULT_BOOST),
761 | new Field('job.performance', new Word('poor'), null, true, 5.0),
762 | ],
763 | ],
764 |
765 | [
766 | 'name' => 'field with field in it',
767 | 'input' => 'field:subfield:what',
768 | 'expected_tokens' => [
769 | [T::T_FIELD_START, 'field'],
770 | [T::T_WORD, 'subfield:what'],
771 | T::T_FIELD_END,
772 | ],
773 | 'expected_nodes' => [
774 | new Field('field', new Word('subfield:what'), null, false, Field::DEFAULT_BOOST),
775 | ],
776 | ],
777 |
778 | [
779 | 'name' => 'field with no value',
780 | 'input' => 'field:',
781 | 'expected_tokens' => [
782 | [T::T_FIELD_START, 'field'],
783 | T::T_FIELD_END,
784 | ],
785 | 'expected_nodes' => [
786 | new Word('field'),
787 | ],
788 | ],
789 |
790 | [
791 | 'name' => 'field with phrases',
792 | 'input' => 'field:"boosted^5 +required"^1 -field:"[1..5]"~4',
793 | 'expected_tokens' => [
794 | [T::T_FIELD_START, 'field'],
795 | [T::T_PHRASE, 'boosted^5 +required'],
796 | T::T_FIELD_END,
797 | T::T_BOOST,
798 | [T::T_NUMBER, 1.0],
799 | T::T_PROHIBITED,
800 | [T::T_FIELD_START, 'field'],
801 | [T::T_PHRASE, '[1..5]'],
802 | T::T_FIELD_END,
803 | T::T_FUZZY,
804 | [T::T_NUMBER, 4.0],
805 | ],
806 | 'expected_nodes' => [
807 | new Field('field', new Phrase('boosted^5 +required'), null, true, 1.0),
808 | new Field('field', new Phrase('[1..5]'), BoolOperator::PROHIBITED, false, Field::DEFAULT_BOOST),
809 | ],
810 | ],
811 |
812 | [
813 | 'name' => 'field with greater/less than',
814 | 'input' => 'field:>100 field:>=100.1 field:<100 field:<=100.1',
815 | 'expected_tokens' => [
816 | [T::T_FIELD_START, 'field'],
817 | T::T_GREATER_THAN,
818 | [T::T_NUMBER, 100.0],
819 | T::T_FIELD_END,
820 | [T::T_FIELD_START, 'field'],
821 | T::T_GREATER_THAN,
822 | T::T_EQUALS,
823 | [T::T_NUMBER, 100.1],
824 | T::T_FIELD_END,
825 | [T::T_FIELD_START, 'field'],
826 | T::T_LESS_THAN,
827 | [T::T_NUMBER, 100.0],
828 | T::T_FIELD_END,
829 | [T::T_FIELD_START, 'field'],
830 | T::T_LESS_THAN,
831 | T::T_EQUALS,
832 | [T::T_NUMBER, 100.1],
833 | T::T_FIELD_END,
834 | ],
835 | 'expected_nodes' => [
836 | new Field('field', new Numbr(100, ComparisonOperator::GT), null, false, Field::DEFAULT_BOOST),
837 | new Field('field', new Numbr(100.1, ComparisonOperator::GTE), null, false, Field::DEFAULT_BOOST),
838 | new Field('field', new Numbr(100, ComparisonOperator::LT), null, false, Field::DEFAULT_BOOST),
839 | new Field('field', new Numbr(100.1, ComparisonOperator::LTE), null, false, Field::DEFAULT_BOOST),
840 | ],
841 | ],
842 |
843 | [
844 | 'name' => 'field with a hashtag or mention',
845 | 'input' => 'field:#cats field:@user.name',
846 | 'expected_tokens' => [
847 | [T::T_FIELD_START, 'field'],
848 | [T::T_HASHTAG, 'cats'],
849 | T::T_FIELD_END,
850 | [T::T_FIELD_START, 'field'],
851 | [T::T_MENTION, 'user.name'],
852 | T::T_FIELD_END,
853 | ],
854 | 'expected_nodes' => [
855 | new Field('field', new Hashtag('cats', BoolOperator::REQUIRED), null, false, Field::DEFAULT_BOOST),
856 | new Field('field', new Mention('user.name', BoolOperator::REQUIRED), null, false, Field::DEFAULT_BOOST),
857 | ],
858 | ],
859 |
860 | [
861 | 'name' => 'field with inclusive range',
862 | 'input' => 'field:[1..5] +field:[1 TO 5]',
863 | 'expected_tokens' => [
864 | [T::T_FIELD_START, 'field'],
865 | T::T_RANGE_INCL_START,
866 | [T::T_NUMBER, 1.0],
867 | T::T_TO,
868 | [T::T_NUMBER, 5.0],
869 | T::T_RANGE_INCL_END,
870 | T::T_FIELD_END,
871 | T::T_REQUIRED,
872 | [T::T_FIELD_START, 'field'],
873 | T::T_RANGE_INCL_START,
874 | [T::T_NUMBER, 1.0],
875 | T::T_TO,
876 | [T::T_NUMBER, 5.0],
877 | T::T_RANGE_INCL_END,
878 | T::T_FIELD_END,
879 | ],
880 | 'expected_nodes' => [
881 | new Field(
882 | 'field',
883 | new NumberRange(
884 | new Numbr(1),
885 | new Numbr(5)
886 | ),
887 | null,
888 | false,
889 | Field::DEFAULT_BOOST
890 | ),
891 | new Field(
892 | 'field',
893 | new NumberRange(
894 | new Numbr(1),
895 | new Numbr(5)
896 | ),
897 | BoolOperator::REQUIRED,
898 | false,
899 | Field::DEFAULT_BOOST
900 | ),
901 | ],
902 | ],
903 |
904 | [
905 | 'name' => 'field with exclusive range',
906 | 'input' => 'field:{1.1..5.5} +field:{1.1 TO 5.5}',
907 | 'expected_tokens' => [
908 | [T::T_FIELD_START, 'field'],
909 | T::T_RANGE_EXCL_START,
910 | [T::T_NUMBER, 1.1],
911 | T::T_TO,
912 | [T::T_NUMBER, 5.5],
913 | T::T_RANGE_EXCL_END,
914 | T::T_FIELD_END,
915 | T::T_REQUIRED,
916 | [T::T_FIELD_START, 'field'],
917 | T::T_RANGE_EXCL_START,
918 | [T::T_NUMBER, 1.1],
919 | T::T_TO,
920 | [T::T_NUMBER, 5.5],
921 | T::T_RANGE_EXCL_END,
922 | T::T_FIELD_END,
923 | ],
924 | 'expected_nodes' => [
925 | new Field(
926 | 'field',
927 | new NumberRange(
928 | new Numbr(1.1),
929 | new Numbr(5.5),
930 | true
931 | ),
932 | null,
933 | false,
934 | Field::DEFAULT_BOOST
935 | ),
936 | new Field(
937 | 'field',
938 | new NumberRange(
939 | new Numbr(1.1),
940 | new Numbr(5.5),
941 | true
942 | ),
943 | BoolOperator::REQUIRED,
944 | false,
945 | Field::DEFAULT_BOOST
946 | ),
947 | ],
948 | ],
949 |
950 | [
951 | 'name' => 'field with subquery',
952 | 'input' => 'field:(cat OR dog) test',
953 | 'expected_tokens' => [
954 | [T::T_FIELD_START, 'field'],
955 | T::T_SUBQUERY_START,
956 | [T::T_WORD, 'cat'],
957 | T::T_OR,
958 | [T::T_WORD, 'dog'],
959 | T::T_SUBQUERY_END,
960 | T::T_FIELD_END,
961 | [T::T_WORD, 'test'],
962 | ],
963 | 'expected_nodes' => [
964 | new Field(
965 | 'field',
966 | new Subquery([
967 | new Word('cat'),
968 | new Word('dog'),
969 | ]),
970 | null,
971 | false,
972 | Field::DEFAULT_BOOST
973 | ),
974 | new Word('test'),
975 | ],
976 | ],
977 |
978 | [
979 | 'name' => 'field with range in subquery',
980 | 'input' => 'field:(cat OR 1..5)',
981 | 'expected_tokens' => [
982 | [T::T_FIELD_START, 'field'],
983 | T::T_SUBQUERY_START,
984 | [T::T_WORD, 'cat'],
985 | T::T_OR,
986 | [T::T_NUMBER, 1.0],
987 | [T::T_NUMBER, 5.0],
988 | T::T_SUBQUERY_END,
989 | T::T_FIELD_END,
990 | ],
991 | 'expected_nodes' => [
992 | new Field(
993 | 'field',
994 | new Subquery([
995 | new Word('cat'),
996 | new Numbr(1.0),
997 | new Numbr(5.0),
998 | ]),
999 | null,
1000 | false,
1001 | Field::DEFAULT_BOOST
1002 | ),
1003 | ],
1004 | ],
1005 |
1006 | [
1007 | 'name' => 'field with dates',
1008 | 'input' => 'field:2015-12-18 field:>2015-12-18 field:<2015-12-18 field:>=2015-12-18 field:<=2015-12-18',
1009 | 'expected_tokens' => [
1010 | [T::T_FIELD_START, 'field'],
1011 | [T::T_DATE, '2015-12-18'],
1012 | T::T_FIELD_END,
1013 | [T::T_FIELD_START, 'field'],
1014 | T::T_GREATER_THAN,
1015 | [T::T_DATE, '2015-12-18'],
1016 | T::T_FIELD_END,
1017 | [T::T_FIELD_START, 'field'],
1018 | T::T_LESS_THAN,
1019 | [T::T_DATE, '2015-12-18'],
1020 | T::T_FIELD_END,
1021 | [T::T_FIELD_START, 'field'],
1022 | T::T_GREATER_THAN,
1023 | T::T_EQUALS,
1024 | [T::T_DATE, '2015-12-18'],
1025 | T::T_FIELD_END,
1026 | [T::T_FIELD_START, 'field'],
1027 | T::T_LESS_THAN,
1028 | T::T_EQUALS,
1029 | [T::T_DATE, '2015-12-18'],
1030 | T::T_FIELD_END,
1031 | ],
1032 | 'expected_nodes' => [
1033 | new Field(
1034 | 'field',
1035 | new Date('2015-12-18'),
1036 | null,
1037 | false,
1038 | Field::DEFAULT_BOOST
1039 | ),
1040 | new Field(
1041 | 'field',
1042 | new Date(
1043 | '2015-12-18',
1044 | null,
1045 | false,
1046 | Date::DEFAULT_BOOST,
1047 | false,
1048 | Date::DEFAULT_FUZZY,
1049 | ComparisonOperator::GT
1050 | ),
1051 | null,
1052 | false,
1053 | Field::DEFAULT_BOOST
1054 | ),
1055 | new Field(
1056 | 'field',
1057 | new Date(
1058 | '2015-12-18',
1059 | null,
1060 | false,
1061 | Date::DEFAULT_BOOST,
1062 | false,
1063 | Date::DEFAULT_FUZZY,
1064 | ComparisonOperator::LT
1065 | ),
1066 | null,
1067 | false,
1068 | Field::DEFAULT_BOOST
1069 | ),
1070 | new Field(
1071 | 'field',
1072 | new Date(
1073 | '2015-12-18',
1074 | null,
1075 | false,
1076 | Date::DEFAULT_BOOST,
1077 | false,
1078 | Date::DEFAULT_FUZZY,
1079 | ComparisonOperator::GTE
1080 | ),
1081 | null,
1082 | false,
1083 | Field::DEFAULT_BOOST
1084 | ),
1085 | new Field(
1086 | 'field',
1087 | new Date(
1088 | '2015-12-18',
1089 | null,
1090 | false,
1091 | Date::DEFAULT_BOOST,
1092 | false,
1093 | Date::DEFAULT_FUZZY,
1094 | ComparisonOperator::LTE
1095 | ),
1096 | null,
1097 | false,
1098 | Field::DEFAULT_BOOST
1099 | ),
1100 | ],
1101 | ],
1102 |
1103 | [
1104 | 'name' => 'field leading _ and uuid',
1105 | 'input' => '_id:a9fc3e46-150a-45cd-ad39-c80f93119900^5',
1106 | 'expected_tokens' => [
1107 | [T::T_FIELD_START, '_id'],
1108 | [T::T_WORD, 'a9fc3e46-150a-45cd-ad39-c80f93119900'],
1109 | T::T_FIELD_END,
1110 | T::T_BOOST,
1111 | [T::T_NUMBER, 5.0],
1112 | ],
1113 | 'expected_nodes' => [
1114 | new Field('_id', new Word('a9fc3e46-150a-45cd-ad39-c80f93119900'), null, true, 5.0),
1115 | ],
1116 | ],
1117 |
1118 | [
1119 | 'name' => 'field with mentions and emails',
1120 | 'input' => 'email:john@doe.com -user:@twitterz',
1121 | 'expected_tokens' => [
1122 | [T::T_FIELD_START, 'email'],
1123 | [T::T_WORD, 'john@doe.com'],
1124 | T::T_FIELD_END,
1125 | T::T_PROHIBITED,
1126 | [T::T_FIELD_START, 'user'],
1127 | [T::T_MENTION, 'twitterz'],
1128 | T::T_FIELD_END,
1129 | ],
1130 | 'expected_nodes' => [
1131 | new Field('email', new Word('john@doe.com'), null, false, Field::DEFAULT_BOOST),
1132 | new Field(
1133 | 'user',
1134 | new Mention('twitterz', BoolOperator::REQUIRED),
1135 | BoolOperator::PROHIBITED,
1136 | false,
1137 | Field::DEFAULT_BOOST
1138 | ),
1139 | ],
1140 | ],
1141 |
1142 | [
1143 | 'name' => 'field with hashtags',
1144 | 'input' => 'tags:#cats tags:(#cats || #dogs)',
1145 | 'expected_tokens' => [
1146 | [T::T_FIELD_START, 'tags'],
1147 | [T::T_HASHTAG, 'cats'],
1148 | T::T_FIELD_END,
1149 | [T::T_FIELD_START, 'tags'],
1150 | T::T_SUBQUERY_START,
1151 | [T::T_HASHTAG, 'cats'],
1152 | T::T_OR,
1153 | [T::T_HASHTAG, 'dogs'],
1154 | T::T_SUBQUERY_END,
1155 | T::T_FIELD_END,
1156 | ],
1157 | 'expected_nodes' => [
1158 | new Field(
1159 | 'tags',
1160 | new Hashtag('cats', BoolOperator::REQUIRED),
1161 | null,
1162 | false,
1163 | Field::DEFAULT_BOOST
1164 | ),
1165 | new Field(
1166 | 'tags',
1167 | new Subquery([
1168 | new Hashtag('cats', BoolOperator::REQUIRED),
1169 | new Hashtag('dogs', BoolOperator::REQUIRED),
1170 | ]),
1171 | null,
1172 | false,
1173 | Field::DEFAULT_BOOST
1174 | ),
1175 | ],
1176 | ],
1177 | /*
1178 | * END: FIELDS
1179 | */
1180 |
1181 |
1182 | /*
1183 | * START: WORDS
1184 | */
1185 | [
1186 | 'name' => 'word with hashtag or mention in it',
1187 | 'input' => 'omg#lol omg@user @mention#tag #tag@mention',
1188 | 'expected_tokens' => [
1189 | [T::T_WORD, 'omg#lol'],
1190 | [T::T_WORD, 'omg@user'],
1191 | [T::T_WORD, 'mention#tag'],
1192 | [T::T_WORD, 'tag@mention'],
1193 | ],
1194 | 'expected_nodes' => [
1195 | new Word('omg#lol'),
1196 | new Word('omg@user'),
1197 | new Word('mention#tag'),
1198 | new Word('tag@mention'),
1199 | ],
1200 | ],
1201 |
1202 | [
1203 | 'name' => 'required/prohibited words',
1204 | 'input' => '+c.h.u.d. -zombieland +ac/dc^5',
1205 | 'expected_tokens' => [
1206 | T::T_REQUIRED,
1207 | [T::T_WORD, 'c.h.u.d'],
1208 | T::T_PROHIBITED,
1209 | [T::T_WORD, 'zombieland'],
1210 | T::T_REQUIRED,
1211 | [T::T_WORD, 'ac/dc'],
1212 | T::T_BOOST,
1213 | [T::T_NUMBER, 5.0],
1214 | ],
1215 | 'expected_nodes' => [
1216 | new Word('c.h.u.d', BoolOperator::REQUIRED),
1217 | new Word('zombieland', BoolOperator::PROHIBITED),
1218 | new Word('ac/dc', BoolOperator::REQUIRED, true, 5.0),
1219 | ],
1220 | ],
1221 |
1222 | [
1223 | 'name' => 'words that have embedded operators',
1224 | 'input' => 'cANDy AND OReos || dANDy && chORes^5',
1225 | 'expected_tokens' => [
1226 | [T::T_WORD, 'cANDy'],
1227 | T::T_AND,
1228 | [T::T_WORD, 'OReos'],
1229 | T::T_OR,
1230 | [T::T_WORD, 'dANDy'],
1231 | T::T_AND,
1232 | [T::T_WORD, 'chORes'],
1233 | T::T_BOOST,
1234 | [T::T_NUMBER, 5.0],
1235 | ],
1236 | 'expected_nodes' => [
1237 | new Word('cANDy', BoolOperator::REQUIRED),
1238 | new Word('OReos', BoolOperator::REQUIRED),
1239 | new Word('dANDy', BoolOperator::REQUIRED),
1240 | new Word('chORes', BoolOperator::REQUIRED, true, 5.0),
1241 | ],
1242 | ],
1243 | /*
1244 | * END: WORDS
1245 | */
1246 |
1247 |
1248 | /*
1249 | * START: DATES
1250 | */
1251 | [
1252 | 'name' => 'dates in string',
1253 | 'input' => '2000-01-01 >=2000-01-01 (+2015-12-18) -2015-12-18',
1254 | 'expected_tokens' => [
1255 | [T::T_DATE, '2000-01-01'],
1256 | [T::T_DATE, '2000-01-01'],
1257 | T::T_SUBQUERY_START,
1258 | T::T_REQUIRED,
1259 | [T::T_DATE, '2015-12-18'],
1260 | T::T_SUBQUERY_END,
1261 | T::T_PROHIBITED,
1262 | [T::T_DATE, '2015-12-18'],
1263 | ],
1264 | 'expected_nodes' => [
1265 | new Date('2000-01-01'),
1266 | new Date('2000-01-01'),
1267 | new Date('2015-12-18', BoolOperator::REQUIRED),
1268 | new Date('2015-12-18', BoolOperator::PROHIBITED),
1269 | ],
1270 | ],
1271 |
1272 | [
1273 | 'name' => 'dates on dates',
1274 | 'input' => '2000-01-012000-01-01 2000-01-01^2000-01-01',
1275 | 'expected_tokens' => [
1276 | [T::T_WORD, '2000-01-012000-01-01'],
1277 | [T::T_DATE, '2000-01-01'],
1278 | T::T_BOOST,
1279 | [T::T_DATE, '2000-01-01'],
1280 | ],
1281 | 'expected_nodes' => [
1282 | new Word('2000-01-012000-01-01'),
1283 | new Date('2000-01-01'),
1284 | new Date('2000-01-01'),
1285 | ],
1286 | ],
1287 | /*
1288 | * END: DATES
1289 | */
1290 |
1291 |
1292 | /*
1293 | * START: ACCENTED CHARS
1294 | */
1295 | [
1296 | 'name' => 'accents and hyphens',
1297 | 'input' => '+Beyoncé Giselle Knowles-Carter',
1298 | 'expected_tokens' => [
1299 | T::T_REQUIRED,
1300 | [T::T_WORD, 'Beyoncé'],
1301 | [T::T_WORD, 'Giselle'],
1302 | [T::T_WORD, 'Knowles-Carter'],
1303 | ],
1304 | 'expected_nodes' => [
1305 | new Word('Beyoncé', BoolOperator::REQUIRED),
1306 | new Word('Giselle'),
1307 | new Word('Knowles-Carter'),
1308 | ],
1309 | ],
1310 |
1311 | [
1312 | 'name' => 'accents and hyphen spice',
1313 | 'input' => 'J. Lo => Emme Maribel Muñiz $p0rty-spicé',
1314 | 'expected_tokens' => [
1315 | [T::T_WORD, 'J'],
1316 | [T::T_WORD, 'Lo'],
1317 | [T::T_WORD, 'Emme'],
1318 | [T::T_WORD, 'Maribel'],
1319 | [T::T_WORD, 'Muñiz'],
1320 | [T::T_WORD, '$p0rty-spicé'],
1321 | ],
1322 | 'expected_nodes' => [
1323 | new Word('J'),
1324 | new Word('Lo'),
1325 | new Word('Emme'),
1326 | new Word('Maribel'),
1327 | new Word('Muñiz'),
1328 | new Word('$p0rty-spicé'),
1329 | ],
1330 | ],
1331 |
1332 | [
1333 | 'name' => 'utf chars',
1334 | 'input' => '测试 測試',
1335 | 'expected_tokens' => [
1336 | [T::T_WORD, '测试'],
1337 | [T::T_WORD, '測試'],
1338 | ],
1339 | 'expected_nodes' => [
1340 | new Word('测试'),
1341 | new Word('測試'),
1342 | ],
1343 | ],
1344 | /*
1345 | * END: ACCENTED CHARS
1346 | */
1347 |
1348 |
1349 | /*
1350 | * START: RAPPERS and POP STARS
1351 | */
1352 | [
1353 | 'name' => 'crazy a$$ names',
1354 | 'input' => 'p!nk AND K$sha in a tr33 with 50¢',
1355 | 'expected_tokens' => [
1356 | [T::T_WORD, 'p!nk'],
1357 | T::T_AND,
1358 | [T::T_WORD, 'K$sha'],
1359 | [T::T_WORD, 'in'],
1360 | [T::T_WORD, 'a'],
1361 | [T::T_WORD, 'tr33'],
1362 | [T::T_WORD, 'with'],
1363 | [T::T_WORD, '50¢'],
1364 | ],
1365 | 'expected_nodes' => [
1366 | new Word('p!nk', BoolOperator::REQUIRED),
1367 | new Word('K$sha', BoolOperator::REQUIRED),
1368 | new Word('in'),
1369 | new Word('a'),
1370 | new Word('tr33'),
1371 | new Word('with'),
1372 | new Word('50¢'),
1373 | ],
1374 | ],
1375 |
1376 | [
1377 | 'name' => 'my name is math(ish)',
1378 | 'input' => '+florence+machine ac/dc^11 Stellastarr* T\'Pau ¡Forward, Russia! "¡Forward, Russia!"~',
1379 | 'expected_tokens' => [
1380 | T::T_REQUIRED,
1381 | [T::T_WORD, 'florence+machine'],
1382 | [T::T_WORD, 'ac/dc'],
1383 | T::T_BOOST,
1384 | [T::T_NUMBER, 11.0],
1385 | [T::T_WORD, 'Stellastarr'],
1386 | T::T_WILDCARD,
1387 | [T::T_WORD, 'T\'Pau'],
1388 | [T::T_WORD, '¡Forward'],
1389 | [T::T_WORD, 'Russia'],
1390 | [T::T_PHRASE, '¡Forward, Russia!'],
1391 | T::T_FUZZY,
1392 | ],
1393 | 'expected_nodes' => [
1394 | new Word('florence+machine', BoolOperator::REQUIRED),
1395 | new Word('ac/dc', null, true, Word::MAX_BOOST),
1396 | new Word('Stellastarr', null, false, Word::DEFAULT_BOOST, false, Word::DEFAULT_FUZZY, true),
1397 | new Word('T\'Pau'),
1398 | new Word('¡Forward'),
1399 | new Word('Russia'),
1400 | new Phrase('¡Forward, Russia!', null, false, Phrase::DEFAULT_BOOST, true, Phrase::DEFAULT_FUZZY),
1401 | ],
1402 | ],
1403 | /*
1404 | * END: RAPPERS and POP STARS
1405 | */
1406 |
1407 |
1408 | /*
1409 | * START: SUBQUERIES
1410 | */
1411 | [
1412 | 'name' => 'mismatched subqueries',
1413 | 'input' => ') test (123 (abc f:a)',
1414 | 'expected_tokens' => [
1415 | [T::T_WORD, 'test'],
1416 | T::T_SUBQUERY_START,
1417 | [T::T_NUMBER, 123.0],
1418 | [T::T_WORD, 'abc'],
1419 | [T::T_WORD, 'f:a'],
1420 | T::T_SUBQUERY_END,
1421 | ],
1422 | 'expected_nodes' => [
1423 | new Word('test'),
1424 | new Subquery([new Numbr(123.0), new Word('abc'), new Word('f:a')]),
1425 | ],
1426 | ],
1427 |
1428 | [
1429 | 'name' => 'filter inside of subquery',
1430 | 'input' => 'word(word:a>(#hashtag:b)',
1431 | 'expected_tokens' => [
1432 | [T::T_WORD, 'word'],
1433 | T::T_SUBQUERY_START,
1434 | [T::T_WORD, 'word:a'],
1435 | [T::T_WORD, 'hashtag:b'],
1436 | T::T_SUBQUERY_END,
1437 | ],
1438 | 'expected_nodes' => [
1439 | new Word('word'),
1440 | new Subquery([new Word('word:a'), new Word('hashtag:b')]),
1441 | ],
1442 | ],
1443 |
1444 | [
1445 | 'name' => 'booleans before and in subqueries',
1446 | 'input' => '"ipad pro" AND (gold OR silver)',
1447 | 'expected_tokens' => [
1448 | [T::T_PHRASE, 'ipad pro'],
1449 | T::T_AND,
1450 | T::T_SUBQUERY_START,
1451 | [T::T_WORD, 'gold'],
1452 | T::T_OR,
1453 | [T::T_WORD, 'silver'],
1454 | T::T_SUBQUERY_END,
1455 | ],
1456 | 'expected_nodes' => [
1457 | new Phrase('ipad pro', BoolOperator::REQUIRED),
1458 | new Subquery([new Word('gold'), new Word('silver')], BoolOperator::REQUIRED),
1459 | ],
1460 | ],
1461 |
1462 | [
1463 | 'name' => 'booleans before and in subqueries 2',
1464 | 'input' => '"iphone 7" -(16gb OR 32gb)',
1465 | 'expected_tokens' => [
1466 | [T::T_PHRASE, 'iphone 7'],
1467 | T::T_PROHIBITED,
1468 | T::T_SUBQUERY_START,
1469 | [T::T_WORD, '16gb'],
1470 | T::T_OR,
1471 | [T::T_WORD, '32gb'],
1472 | T::T_SUBQUERY_END,
1473 | ],
1474 | 'expected_nodes' => [
1475 | new Phrase('iphone 7'),
1476 | new Subquery([new Word('16gb'), new Word('32gb')], BoolOperator::PROHIBITED),
1477 | ],
1478 | ],
1479 | /*
1480 | * END: SUBQUERIES
1481 | */
1482 |
1483 |
1484 | /*
1485 | * START: WEIRD QUERIES
1486 | */
1487 | [
1488 | 'name' => 'whip nae nae',
1489 | 'input' => 'Watch Me (Whip/Nae Nae)',
1490 | 'expected_tokens' => [
1491 | [T::T_WORD, 'Watch'],
1492 | [T::T_WORD, 'Me'],
1493 | T::T_SUBQUERY_START,
1494 | [T::T_WORD, 'Whip/Nae'],
1495 | [T::T_WORD, 'Nae'],
1496 | T::T_SUBQUERY_END,
1497 | ],
1498 | 'expected_nodes' => [
1499 | new Word('Watch'),
1500 | new Word('Me'),
1501 | new Subquery([new Word('Whip/Nae'), new Word('Nae')]),
1502 | ],
1503 | ],
1504 |
1505 | [
1506 | 'name' => 'epic or fail',
1507 | 'input' => 'epic or fail',
1508 | 'expected_tokens' => [
1509 | [T::T_WORD, 'epic'],
1510 | [T::T_WORD, 'or'],
1511 | [T::T_WORD, 'fail'],
1512 | ],
1513 | 'expected_nodes' => [
1514 | new Word('epic'),
1515 | new Word('or'),
1516 | new Word('fail'),
1517 | ],
1518 | ],
1519 |
1520 | [
1521 | 'name' => 'use of || then and required subquery',
1522 | 'input' => 'test || AND what (+test)',
1523 | 'expected_tokens' => [
1524 | [T::T_WORD, 'test'],
1525 | T::T_OR,
1526 | T::T_AND,
1527 | [T::T_WORD, 'what'],
1528 | T::T_SUBQUERY_START,
1529 | T::T_REQUIRED,
1530 | [T::T_WORD, 'test'],
1531 | T::T_SUBQUERY_END,
1532 | ],
1533 | 'expected_nodes' => [
1534 | new Word('test'),
1535 | new Word('what', BoolOperator::REQUIRED),
1536 | new Word('test', BoolOperator::REQUIRED),
1537 | ],
1538 | ],
1539 |
1540 | [
1541 | 'name' => 'mega subqueries, all non-sensical',
1542 | 'input' => 'test OR ( ( 1 ) OR ( ( 2 ) ) OR ( ( ( 3.14 ) ) ) OR a OR +b ) OR +field:>1',
1543 | 'expected_tokens' => [
1544 | [T::T_WORD, 'test'],
1545 | T::T_OR,
1546 | T::T_SUBQUERY_START,
1547 | [T::T_NUMBER, 1.0],
1548 | T::T_SUBQUERY_END,
1549 | T::T_OR,
1550 | T::T_SUBQUERY_START,
1551 | [T::T_NUMBER, 2.0],
1552 | T::T_SUBQUERY_END,
1553 | T::T_OR,
1554 | T::T_SUBQUERY_START,
1555 | [T::T_NUMBER, 3.14],
1556 | T::T_SUBQUERY_END,
1557 | T::T_OR,
1558 | [T::T_WORD, 'a'],
1559 | T::T_OR,
1560 | T::T_REQUIRED,
1561 | [T::T_WORD, 'b'],
1562 | T::T_OR,
1563 | T::T_REQUIRED,
1564 | [T::T_FIELD_START, 'field'],
1565 | T::T_GREATER_THAN,
1566 | [T::T_NUMBER, 1.0],
1567 | T::T_FIELD_END,
1568 | ],
1569 | 'expected_nodes' => [
1570 | new Word('test'),
1571 | new Numbr(1),
1572 | new Numbr(2),
1573 | new Numbr(3.14),
1574 | new Word('a'),
1575 | new Word('b', BoolOperator::REQUIRED),
1576 | new Field(
1577 | 'field',
1578 | new Numbr(1.0, ComparisonOperator::GT),
1579 | BoolOperator::REQUIRED,
1580 | false,
1581 | Field::DEFAULT_BOOST
1582 | ),
1583 | ],
1584 | ],
1585 |
1586 | [
1587 | 'name' => 'common dotted things',
1588 | 'input' => 'R.I.P. Motörhead',
1589 | 'expected_tokens' => [
1590 | [T::T_WORD, 'R.I.P'],
1591 | [T::T_WORD, 'Motörhead'],
1592 | ],
1593 | 'expected_nodes' => [
1594 | new Word('R.I.P'),
1595 | new Word('Motörhead'),
1596 | ],
1597 | ],
1598 |
1599 | [
1600 | 'name' => 'ignored chars',
1601 | 'input' => '!!! ! $ _ . ; %',
1602 | 'expected_tokens' => [],
1603 | 'expected_nodes' => [],
1604 | ],
1605 |
1606 | [
1607 | 'name' => 'elastic search example 1',
1608 | 'input' => '"john smith"^2 (foo bar)^4',
1609 | 'expected_tokens' => [
1610 | [T::T_PHRASE, 'john smith'],
1611 | T::T_BOOST,
1612 | [T::T_NUMBER, 2.0],
1613 | T::T_SUBQUERY_START,
1614 | [T::T_WORD, 'foo'],
1615 | [T::T_WORD, 'bar'],
1616 | T::T_SUBQUERY_END,
1617 | T::T_BOOST,
1618 | [T::T_NUMBER, 4.0],
1619 | ],
1620 | 'expected_nodes' => [
1621 | new Phrase('john smith', null, true, 2.0),
1622 | new Subquery([new Word('foo'), new Word('bar')], null, true, 4.0),
1623 | ],
1624 | ],
1625 |
1626 | [
1627 | 'name' => 'intentionally mutant',
1628 | 'input' => '[blah "[[shortcode]]" akd_ -gj% ! @* (+=} --> ;\' [
1704 | [T::T_WORD, 'a"b"#c"#d'],
1705 | [T::T_WORD, 'e'],
1706 | ],
1707 | 'expected_nodes' => [
1708 | new Word('a"b"#c"#d'),
1709 | new Word('e'),
1710 | ],
1711 | ],
1712 |
1713 | [
1714 | 'name' => 'xss1',
1715 | 'input' => '
',
1716 | 'expected_tokens' => [
1717 | [T::T_WORD, 'IMG'],
1718 | [T::T_WORD, 'SRC'],
1719 | [T::T_WORD, 'jAvascript:alert'],
1720 | T::T_SUBQUERY_START,
1721 | [T::T_WORD, 'test2'],
1722 | T::T_SUBQUERY_END,
1723 | ],
1724 | 'expected_nodes' => [
1725 | new Word('IMG'),
1726 | new Word('SRC'),
1727 | new Word('jAvascript:alert'),
1728 | new Word('test2'),
1729 | ],
1730 | ],
1731 |
1732 | [
1733 | 'name' => 'should not be required',
1734 | 'input' => 'token + token',
1735 | 'expected_tokens' => [
1736 | [T::T_WORD, 'token'],
1737 | [T::T_WORD, 'token'],
1738 | ],
1739 | 'expected_nodes' => [
1740 | new Word('token'),
1741 | new Word('token'),
1742 | ],
1743 | ],
1744 |
1745 | [
1746 | 'name' => 'should not be prohibited',
1747 | 'input' => 'token - token',
1748 | 'expected_tokens' => [
1749 | [T::T_WORD, 'token'],
1750 | [T::T_WORD, 'token'],
1751 | ],
1752 | 'expected_nodes' => [
1753 | new Word('token'),
1754 | new Word('token'),
1755 | ],
1756 | ],
1757 |
1758 | [
1759 | 'name' => 'should not be boosted',
1760 | 'input' => 'token ^5 token',
1761 | 'expected_tokens' => [
1762 | [T::T_WORD, 'token'],
1763 | [T::T_NUMBER, 5.0],
1764 | [T::T_WORD, 'token'],
1765 | ],
1766 | 'expected_nodes' => [
1767 | new Word('token'),
1768 | new Numbr(5.0),
1769 | new Word('token'),
1770 | ],
1771 | ],
1772 |
1773 | [
1774 | 'name' => 'should not have words or phrases without real characters',
1775 | 'input' => 'test taco-spice chester:copperpot :: : ; ;; " " , - -- - ++ "a phrase:" _ [ ] { } | \\ / ` * ~ ! @ ( ) # $ % ^ & = < > ?',
1776 | 'expected_tokens' => [
1777 | [T::T_WORD, 'test'],
1778 | [T::T_WORD, 'taco-spice'],
1779 | [T::T_FIELD_START, 'chester'],
1780 | [T::T_WORD, 'copperpot'],
1781 | T::T_FIELD_END,
1782 | T::T_PROHIBITED,
1783 | T::T_REQUIRED,
1784 | [T::T_PHRASE, 'a phrase:'],
1785 | T::T_WILDCARD,
1786 | T::T_SUBQUERY_START,
1787 | T::T_SUBQUERY_END,
1788 | ],
1789 | 'expected_nodes' => [
1790 | new Word('test'),
1791 | new Word('taco-spice'),
1792 | new Field('chester', new Word('copperpot')),
1793 | new Phrase('a phrase:'),
1794 | ],
1795 | ],
1796 | /*
1797 | * END: WEIRD QUERIES
1798 | */
1799 | ];
1800 |
--------------------------------------------------------------------------------
/tests/QueryParserTest.php:
--------------------------------------------------------------------------------
1 | parser = new QueryParser();
16 | }
17 |
18 | /**
19 | * @dataProvider getTestQueries
20 | *
21 | * @param string $name
22 | * @param string $input
23 | * @param null $ignored
24 | * @param array $expectedNodes
25 | */
26 | public function testParse(string $name, string $input, $ignored, array $expectedNodes = []): void
27 | {
28 | $result = $this->parser->parse($input);
29 | $this->assertEquals($expectedNodes, $result->getNodes(), "Test query [{$name}] with input [{$input}] failed.");
30 | }
31 |
32 | public function getTestQueries(): array
33 | {
34 | return require __DIR__ . '/Fixtures/test-queries.php';
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/tests/TokenizerTest.php:
--------------------------------------------------------------------------------
1 | tokenizer = new Tokenizer();
17 | }
18 |
19 | public function testOnlyWhitespace(): void
20 | {
21 | $this->assertEquals([], $this->tokenizer->scan(' ')->getTokens());
22 | }
23 |
24 | /**
25 | * @dataProvider getTestQueries
26 | *
27 | * @param string $name
28 | * @param string $input
29 | * @param array $expectedTokens
30 | */
31 | public function testScan(string $name, string $input, array $expectedTokens): void
32 | {
33 | // convert the sample 'expected' into token objects.
34 | foreach ($expectedTokens as $k => $v) {
35 | if (!is_array($v)) {
36 | $expectedTokens[$k] = new T($v);
37 | continue;
38 | }
39 |
40 | $expectedTokens[$k] = new T($v[0], $v[1]);
41 | }
42 |
43 | $tokenStream = $this->tokenizer->scan($input);
44 | $this->assertEquals($expectedTokens, $tokenStream->getTokens(), "Test query [{$name}] with input [{$input}] failed.");
45 | }
46 |
47 | public function getTestQueries(): array
48 | {
49 | return require __DIR__ . '/Fixtures/test-queries.php';
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 |