├── .github
└── workflows
│ └── test.yml
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── CHANGELOG.rst
├── LICENSE
├── LICENSE.Apache2
├── LICENSE.LGPLv3
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
├── Makefile
├── make.bat
└── source
│ ├── _static
│ └── luqum-logo.png
│ ├── about.rst
│ ├── api.rst
│ ├── conf.py
│ ├── index.rst
│ ├── install.rst
│ └── quick_start.rst
├── luqum-logo.png
├── luqum
├── __init__.py
├── auto_head_tail.py
├── check.py
├── deprecated_utils.py
├── elasticsearch
│ ├── __init__.py
│ ├── nested.py
│ ├── schema.py
│ ├── tree.py
│ └── visitor.py
├── exceptions.py
├── head_tail.py
├── naming.py
├── parser.py
├── parsetab.py
├── pretty.py
├── thread.py
├── tree.py
├── utils.py
└── visitor.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
├── __init__.py
├── alternative_lexer.py
├── test_auto_head_tail.py
├── test_check.py
├── test_deprecated_utils.py
├── test_elasticsearch
├── __init__.py
├── book.json
├── es_integration_utils.py
├── test_es_integration.py
├── test_es_naming.py
├── test_estree.py
├── test_naming.py
├── test_nested.py
├── test_schema.py
└── tests.py
├── test_headtail.py
├── test_naming.py
├── test_parser.py
├── test_pretty.py
├── test_quick_start.rst
├── test_thread.py
├── test_tree.py
├── test_utils.py
└── test_visitor.py
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: code quality checks and unit tests
2 |
3 | on:
4 | pull_request:
5 |
6 |
7 | jobs:
8 | # quality
9 | quality_checks:
10 | runs-on: ubuntu-latest
11 | strategy:
12 | matrix:
13 | python-version:
14 | - "3.10"
15 | - "3.11"
16 | - "3.12"
17 | - "3.13"
18 | es-version:
19 | - "8.17.1"
20 | es-dsl-version:
21 | - "8.17.1"
22 | include:
23 | # only test older ES version with python 3.10
24 | - python-version: "3.10"
25 | es-version: "6.8.23"
26 | es-dsl-version: "6.4.0"
27 | - python-version: "3.10"
28 | es-version: "7.17.6"
29 | es-dsl-version: "7.4.0"
30 | # but also runs with newer versions of ES
31 | - python-version: "3.10"
32 | es-version: "8.17.1"
33 | es-dsl-version: "8.17.1"
34 |
35 | env:
36 | ES_VERSION: "${{ matrix.es-version }}"
37 | steps:
38 | #----------------------------------------------
39 | # check-out repo and set-up python
40 | #----------------------------------------------
41 | - name: Check out repository
42 | uses: actions/checkout@v3
43 | - name: Setup python
44 | uses: actions/setup-python@v4
45 | with:
46 | python-version: "${{ matrix.python-version }}"
47 | - name: install project
48 | run: |
49 | pip install -r requirements-dev.txt
50 | pip install .
51 | # coveralls yet incompatible with python3.13 yet
52 | [[ ${{ matrix.python-version }} != 3.13 ]] && pip install coveralls
53 | pip install elasticsearch-dsl==${{ matrix.es-dsl-version }}
54 | - name: run tests
55 | run: |
56 | make quality && \
57 | make es_tests
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # PLY
7 | parser.out
8 | parsetab.py
9 |
10 | # coverage
11 | cover/
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | .venv
51 | nosetests.xml
52 | coverage.xml
53 | *,cover
54 | .hypothesis/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 |
63 | # Sphinx documentation
64 | docs/build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # nose
70 | .noseids
71 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Set the version of Python and other tools you might need
8 | build:
9 | os: ubuntu-20.04
10 | tools:
11 | python: "3.9"
12 |
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 | configuration: docs/source/conf.py
16 |
17 | # If using Sphinx, optionally build your docs in additional formats such as PDF
18 | formats:
19 | - pdf
20 | - epub
21 |
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 | install:
25 | - requirements: requirements-dev.txt
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | matrix:
4 | include:
5 | - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1
6 | python: 3.7
7 | sudo: true
8 | dist: bionic
9 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
10 | python: 3.7
11 | dist: bionic
12 | sudo: true
13 | - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1
14 | python: 3.8
15 | sudo: true
16 | dist: bionic
17 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
18 | python: 3.8
19 | dist: bionic
20 | sudo: true
21 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
22 | python: 3.9
23 | dist: bionic
24 | sudo: true
25 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/6.x/apt ES_DSL_VERS=6.3.1 ES_VERS=6.4.3
26 | python: 3.10
27 | dist: bionic
28 | sudo: true
29 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
30 | python: 3.10
31 | dist: bionic
32 | sudo: true
33 |
34 |
35 | before_install:
36 | - sudo rm /etc/apt/sources.list; sudo touch /etc/apt/sources.list
37 | - wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
38 | - echo "deb $ES_APT_URL stable main" | sudo tee -a /etc/apt/sources.list.d/elastic.list
39 | - sudo apt-get update && sudo apt-get install elasticsearch=$ES_VERS -y --allow-downgrades
40 | - sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch
41 | - sudo systemctl start elasticsearch.service
42 | - while ! curl -XGET "localhost:9200";do sleep 1; done
43 | - curl localhost:9200
44 |
45 | install:
46 | - pip install .
47 | - pip install -r requirements-dev.txt
48 | - pip install coveralls
49 | - pip install elasticsearch-dsl==$ES_DSL_VERS
50 |
51 | script:
52 | - make tests
53 | - make quality
54 |
55 | after_success:
56 | # coveralls only for python3.8 and ES 7
57 | - python --version |grep 3.8 && [ $ES_DSL_VERS == "7.2.1" ] && coveralls
58 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This project is dualed licensed.
2 |
3 | See LICENSE.LGPLv3 and LICENSE.Apache2
4 |
--------------------------------------------------------------------------------
/LICENSE.Apache2:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/LICENSE.LGPLv3:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include *.rst
3 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | ES_VERSION ?= 8.17.1
2 |
3 | tests:
4 | pytest
5 |
6 | # integration test with ES using docker
7 | es_tests:
8 | ( docker ps |grep luqum_test_es ) || \
9 | docker run --name luqum_test_es --rm -d -ti -p 127.0.0.1:9200:9200 \
10 | -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
11 | -e "xpack.security.enabled=false" \
12 | elasticsearch:${ES_VERSION}
13 | # wait ES to be ready
14 | @echo "waiting for ES to be ready"
15 | @while ! curl -XGET "localhost:9200" >/dev/null 2>&1;do sleep 1; echo -n "."; done
16 | pytest
17 | docker stop luqum_test_es
18 |
19 | quality:
20 | flake8 luqum tests
21 |
22 | # To upload files, you need to have a ~/.pypirc file locally.
23 | # This file should contain all the necessary passwords and API-tokens.
24 | distribute:
25 | rm -r build
26 | rm dist/*
27 | python -m build --wheel
28 | python -m build --sdist
29 | python -m twine upload --verbose --repository luqum dist/*
30 |
31 | .PHONY: tests quality distribute
32 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | luqum - A lucene query parser in Python, using PLY
2 | #########################################################
3 |
4 | |pypi-version| |readthedocs| |travis| |coveralls|
5 |
6 | |logo|
7 |
8 | "luqum" (as in LUcene QUery Manipolator) is a tool to parse queries
9 | written in the `Lucene Query DSL`_ and build an abstract syntax tree
10 | to inspect, analyze or otherwise manipulate search queries.
11 |
12 | It enables enriching the Lucene Query DSL meanings
13 | (for example to support nested object searches or have particular treatments on some fields),
14 | and transform lucene DSL queries to native `ElasticSearch JSON DSL`_
15 |
16 | Thanks to luqum, your users may continue to write queries like:
17 | `author.last_name:Smith OR author:(age:[25 TO 34] AND first_name:John)`
18 | and you will be able to leverage ElasticSearch query DSL,
19 | and control the precise meaning of each search terms.
20 |
21 | Luqum is dual licensed under Apache2.0 and LGPLv3.
22 |
23 | Compatible with Python 3.10+
24 |
25 | Installation
26 | ============
27 |
28 | ``pip install luqum``
29 |
30 |
31 | Dependencies
32 | ============
33 |
34 | `PLY`_ >= 3.11
35 |
36 |
37 | Full documentation
38 | ==================
39 |
40 | http://luqum.readthedocs.org/en/latest/
41 |
42 |
43 | .. _`Lucene Query DSL` : https://lucene.apache.org/core/3_6_0/queryparsersyntax.html
44 | .. _`ElasticSearch JSON DSL`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
45 |
46 | .. _`PLY`: http://www.dabeaz.com/ply/
47 |
48 | .. |logo| image:: https://raw.githubusercontent.com/jurismarches/luqum/master/luqum-logo.png
49 |
50 | .. |pypi-version| image:: https://img.shields.io/pypi/v/luqum.svg
51 | :target: https://pypi.python.org/pypi/luqum
52 | :alt: Latest PyPI version
53 | .. |travis| image:: http://img.shields.io/travis/jurismarches/luqum/master.svg?style=flat
54 | :target: https://travis-ci.org/jurismarches/luqum
55 | .. |coveralls| image:: http://img.shields.io/coveralls/jurismarches/luqum/master.svg?style=flat
56 | :target: https://coveralls.io/r/jurismarches/luqum
57 | .. |readthedocs| image:: https://readthedocs.org/projects/luqum/badge/?version=latest
58 | :target: http://luqum.readthedocs.org/en/latest/?badge=latest
59 | :alt: Documentation Status
60 |
61 |
62 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help
23 | help:
24 | @echo "Please use \`make ' where is one of"
25 | @echo " html to make standalone HTML files"
26 | @echo " dirhtml to make HTML files named index.html in directories"
27 | @echo " singlehtml to make a single large HTML file"
28 | @echo " pickle to make pickle files"
29 | @echo " json to make JSON files"
30 | @echo " htmlhelp to make HTML files and a HTML help project"
31 | @echo " qthelp to make HTML files and a qthelp project"
32 | @echo " applehelp to make an Apple Help Book"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " epub3 to make an epub3"
36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
37 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
39 | @echo " text to make text files"
40 | @echo " man to make manual pages"
41 | @echo " texinfo to make Texinfo files"
42 | @echo " info to make Texinfo files and run them through makeinfo"
43 | @echo " gettext to make PO message catalogs"
44 | @echo " changes to make an overview of all changed/added/deprecated items"
45 | @echo " xml to make Docutils-native XML files"
46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
47 | @echo " linkcheck to check all external links for integrity"
48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
49 | @echo " coverage to run coverage check of the documentation (if enabled)"
50 | @echo " dummy to check syntax errors of document sources"
51 |
52 | .PHONY: clean
53 | clean:
54 | rm -rf $(BUILDDIR)/*
55 |
56 | .PHONY: html
57 | html:
58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
59 | @echo
60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
61 |
62 | .PHONY: dirhtml
63 | dirhtml:
64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
65 | @echo
66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
67 |
68 | .PHONY: singlehtml
69 | singlehtml:
70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
71 | @echo
72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
73 |
74 | .PHONY: pickle
75 | pickle:
76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
77 | @echo
78 | @echo "Build finished; now you can process the pickle files."
79 |
80 | .PHONY: json
81 | json:
82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
83 | @echo
84 | @echo "Build finished; now you can process the JSON files."
85 |
86 | .PHONY: htmlhelp
87 | htmlhelp:
88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
89 | @echo
90 | @echo "Build finished; now you can run HTML Help Workshop with the" \
91 | ".hhp project file in $(BUILDDIR)/htmlhelp."
92 |
93 | .PHONY: qthelp
94 | qthelp:
95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
96 | @echo
97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/luqum.qhcp"
100 | @echo "To view the help file:"
101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/luqum.qhc"
102 |
103 | .PHONY: applehelp
104 | applehelp:
105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | @echo
107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | @echo "N.B. You won't be able to view it unless you put it in" \
109 | "~/Library/Documentation/Help or install it in your application" \
110 | "bundle."
111 |
112 | .PHONY: devhelp
113 | devhelp:
114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | @echo
116 | @echo "Build finished."
117 | @echo "To view the help file:"
118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/luqum"
119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/luqum"
120 | @echo "# devhelp"
121 |
122 | .PHONY: epub
123 | epub:
124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | @echo
126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 |
128 | .PHONY: epub3
129 | epub3:
130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | @echo
132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 |
134 | .PHONY: latex
135 | latex:
136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | @echo
138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | "(use \`make latexpdf' here to do that automatically)."
141 |
142 | .PHONY: latexpdf
143 | latexpdf:
144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | @echo "Running LaTeX files through pdflatex..."
146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 |
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | @echo "Running LaTeX files through platex and dvipdfmx..."
153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 |
156 | .PHONY: text
157 | text:
158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | @echo
160 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
161 |
162 | .PHONY: man
163 | man:
164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | @echo
166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 |
168 | .PHONY: texinfo
169 | texinfo:
170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | @echo
172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | @echo "Run \`make' in that directory to run these through makeinfo" \
174 | "(use \`make info' here to do that automatically)."
175 |
176 | .PHONY: info
177 | info:
178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | @echo "Running Texinfo files through makeinfo..."
180 | make -C $(BUILDDIR)/texinfo info
181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 |
183 | .PHONY: gettext
184 | gettext:
185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | @echo
187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 |
189 | .PHONY: changes
190 | changes:
191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | @echo
193 | @echo "The overview file is in $(BUILDDIR)/changes."
194 |
195 | .PHONY: linkcheck
196 | linkcheck:
197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | @echo
199 | @echo "Link check complete; look for any errors in the above output " \
200 | "or in $(BUILDDIR)/linkcheck/output.txt."
201 |
202 | .PHONY: doctest
203 | doctest:
204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | @echo "Testing of doctests in the sources finished, look at the " \
206 | "results in $(BUILDDIR)/doctest/output.txt."
207 |
208 | .PHONY: coverage
209 | coverage:
210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | @echo "Testing of coverage in the sources finished, look at the " \
212 | "results in $(BUILDDIR)/coverage/python.txt."
213 |
214 | .PHONY: xml
215 | xml:
216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | @echo
218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 |
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | @echo
224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 |
226 | .PHONY: dummy
227 | dummy:
228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | @echo
230 | @echo "Build finished. Dummy builder generates no files."
231 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. epub3 to make an epub3
31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
32 | echo. text to make text files
33 | echo. man to make manual pages
34 | echo. texinfo to make Texinfo files
35 | echo. gettext to make PO message catalogs
36 | echo. changes to make an overview over all changed/added/deprecated items
37 | echo. xml to make Docutils-native XML files
38 | echo. pseudoxml to make pseudoxml-XML files for display purposes
39 | echo. linkcheck to check all external links for integrity
40 | echo. doctest to run all doctests embedded in the documentation if enabled
41 | echo. coverage to run coverage check of the documentation if enabled
42 | echo. dummy to check syntax errors of document sources
43 | goto end
44 | )
45 |
46 | if "%1" == "clean" (
47 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
48 | del /q /s %BUILDDIR%\*
49 | goto end
50 | )
51 |
52 |
53 | REM Check if sphinx-build is available and fallback to Python version if any
54 | %SPHINXBUILD% 1>NUL 2>NUL
55 | if errorlevel 9009 goto sphinx_python
56 | goto sphinx_ok
57 |
58 | :sphinx_python
59 |
60 | set SPHINXBUILD=python -m sphinx.__init__
61 | %SPHINXBUILD% 2> nul
62 | if errorlevel 9009 (
63 | echo.
64 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
65 | echo.installed, then set the SPHINXBUILD environment variable to point
66 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
67 | echo.may add the Sphinx directory to PATH.
68 | echo.
69 | echo.If you don't have Sphinx installed, grab it from
70 | echo.http://sphinx-doc.org/
71 | exit /b 1
72 | )
73 |
74 | :sphinx_ok
75 |
76 |
77 | if "%1" == "html" (
78 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
79 | if errorlevel 1 exit /b 1
80 | echo.
81 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
82 | goto end
83 | )
84 |
85 | if "%1" == "dirhtml" (
86 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
87 | if errorlevel 1 exit /b 1
88 | echo.
89 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
90 | goto end
91 | )
92 |
93 | if "%1" == "singlehtml" (
94 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
95 | if errorlevel 1 exit /b 1
96 | echo.
97 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
98 | goto end
99 | )
100 |
101 | if "%1" == "pickle" (
102 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | if errorlevel 1 exit /b 1
104 | echo.
105 | echo.Build finished; now you can process the pickle files.
106 | goto end
107 | )
108 |
109 | if "%1" == "json" (
110 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | if errorlevel 1 exit /b 1
112 | echo.
113 | echo.Build finished; now you can process the JSON files.
114 | goto end
115 | )
116 |
117 | if "%1" == "htmlhelp" (
118 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | if errorlevel 1 exit /b 1
120 | echo.
121 | echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | goto end
124 | )
125 |
126 | if "%1" == "qthelp" (
127 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | if errorlevel 1 exit /b 1
129 | echo.
130 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\luqum.qhcp
133 | echo.To view the help file:
134 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\luqum.ghc
135 | goto end
136 | )
137 |
138 | if "%1" == "devhelp" (
139 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | if errorlevel 1 exit /b 1
141 | echo.
142 | echo.Build finished.
143 | goto end
144 | )
145 |
146 | if "%1" == "epub" (
147 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | if errorlevel 1 exit /b 1
149 | echo.
150 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | goto end
152 | )
153 |
154 | if "%1" == "epub3" (
155 | %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | if errorlevel 1 exit /b 1
157 | echo.
158 | echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | goto end
160 | )
161 |
162 | if "%1" == "latex" (
163 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | if errorlevel 1 exit /b 1
165 | echo.
166 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdf" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "latexpdfja" (
181 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | cd %BUILDDIR%/latex
183 | make all-pdf-ja
184 | cd %~dp0
185 | echo.
186 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | goto end
188 | )
189 |
190 | if "%1" == "text" (
191 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | if errorlevel 1 exit /b 1
193 | echo.
194 | echo.Build finished. The text files are in %BUILDDIR%/text.
195 | goto end
196 | )
197 |
198 | if "%1" == "man" (
199 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | if errorlevel 1 exit /b 1
201 | echo.
202 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | goto end
204 | )
205 |
206 | if "%1" == "texinfo" (
207 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | if errorlevel 1 exit /b 1
209 | echo.
210 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | goto end
212 | )
213 |
214 | if "%1" == "gettext" (
215 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | if errorlevel 1 exit /b 1
217 | echo.
218 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | goto end
220 | )
221 |
222 | if "%1" == "changes" (
223 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | if errorlevel 1 exit /b 1
225 | echo.
226 | echo.The overview file is in %BUILDDIR%/changes.
227 | goto end
228 | )
229 |
230 | if "%1" == "linkcheck" (
231 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | if errorlevel 1 exit /b 1
233 | echo.
234 | echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | goto end
237 | )
238 |
239 | if "%1" == "doctest" (
240 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | if errorlevel 1 exit /b 1
242 | echo.
243 | echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | goto end
246 | )
247 |
248 | if "%1" == "coverage" (
249 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | if errorlevel 1 exit /b 1
251 | echo.
252 | echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | goto end
255 | )
256 |
257 | if "%1" == "xml" (
258 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | if errorlevel 1 exit /b 1
260 | echo.
261 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | goto end
263 | )
264 |
265 | if "%1" == "pseudoxml" (
266 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | if errorlevel 1 exit /b 1
268 | echo.
269 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | goto end
271 | )
272 |
273 | if "%1" == "dummy" (
274 | %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | if errorlevel 1 exit /b 1
276 | echo.
277 | echo.Build finished. Dummy builder generates no files.
278 | goto end
279 | )
280 |
281 | :end
282 |
--------------------------------------------------------------------------------
/docs/source/_static/luqum-logo.png:
--------------------------------------------------------------------------------
1 | ../../../luqum-logo.png
--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
1 | What is Luqum
2 | ###############
3 |
4 | Luqum stands for LUcene QUery Manipolator.
5 |
6 | It features a python library with a parser for the `Lucene Query DSL`_ as found in
7 | `Solr`_ `query syntax`_ or
8 | `ElasticSearch`_ `query string`_
9 |
10 | From the parser it builds a tree (see :ref:`tutorial-parsing`).
11 |
12 | This tree can eventually be manipulated
13 | and then transformed back into a query string,
14 | or used to generate other form of query.
15 |
16 | In particular, luqum ships with
17 | a utility to transform a lucene query
18 | into a query using Elasticsearch query DSL language (in json form).
19 | (see :ref:`tutorial-elastic`)
20 |
21 | You may use this to:
22 |
23 | * make some sanity check on query
24 | * make your own check on query (eg. forbid certain fields)
25 | * replace some expressions in query
26 | * pretty print a query
27 | * inject queries in queries
28 | * extend lucene query language semantics
29 |
30 | The parser is built using `PLY`_.
31 |
32 | Luqum is dual licensed under Apache2.0 and LGPLv3.
33 |
34 | .. warning::
35 |
36 | While used in production by our team for some time,
37 | this library is still a work in progress and also lacks some features.
38 |
39 | Contributions are welcome.
40 |
41 | .. _`Lucene Query DSL`: https://lucene.apache.org/core/3_6_0/queryparsersyntax.html
42 | .. _`Solr`: http://lucene.apache.org/solr/
43 | .. _`query syntax`: https://wiki.apache.org/solr/SolrQuerySyntax
44 | .. _`ElasticSearch`: https://www.elastic.co/products/elasticsearch
45 | .. _`query string`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
46 | .. _`PLY`: http://www.dabeaz.com/ply/ply.html
47 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | API
2 | #####
3 |
4 | Parsing and constructing queries
5 | ==================================
6 |
7 | This is the core of the library. A parser and the syntax tree definition.
8 |
9 | luqum.parser
10 | ---------------
11 |
12 | .. automodule:: luqum.parser
13 | :members: parser, parse
14 |
15 | luqum.threading
16 | ---------------
17 |
18 | .. automodule:: luqum.thread
19 | :members: parse
20 |
21 | luqum.tree
22 | ---------------
23 |
24 | .. automodule:: luqum.tree
25 | :members:
26 | :member-order: bysource
27 |
28 | .. _elasticsearch-api:
29 |
30 | Transforming to Elastic Search queries
31 | ======================================
32 |
33 | luqum.schema
34 | ------------
35 |
36 | .. autoclass:: luqum.elasticsearch.schema.SchemaAnalyzer
37 | :members:
38 | :member-order: bysource
39 |
40 |
41 | luqum.elasticsearch
42 | --------------------
43 |
44 | .. autoclass:: luqum.elasticsearch.visitor.ElasticsearchQueryBuilder
45 | :members: __init__, __call__
46 | :member-order: bysource
47 |
48 |
49 | Naming and explaining matches
50 | ==============================
51 |
52 |
53 | luqum.naming
54 | ------------
55 |
56 | .. automodule:: luqum.naming
57 | :members:
58 | :member-order: bysource
59 |
60 |
61 | Utilities
62 | ==========
63 |
64 |
65 | luqum.visitor: Manipulating trees
66 | ----------------------------------
67 |
68 | .. automodule:: luqum.visitor
69 | :members:
70 | :member-order: bysource
71 |
72 |
73 | luqum.auto_head_tail: Automatic addition of spaces
74 | --------------------------------------------------
75 |
76 | .. automodule:: luqum.auto_head_tail
77 | :members:
78 |
79 | luqum.pretty: Pretty printing
80 | ------------------------------
81 |
82 | .. automodule:: luqum.pretty
83 | :members:
84 |
85 | luqum.check: Checking for validity
86 | -----------------------------------
87 |
88 | .. automodule:: luqum.check
89 | :members:
90 |
91 | luqum.utils: Misc
92 | -----------------
93 |
94 | .. automodule:: luqum.utils
95 | :members:
96 | :member-order: bysource
97 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # luqum documentation build configuration file, created by
5 | # sphinx-quickstart on Wed Apr 13 10:25:52 2016.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | import sys
17 | import os
18 |
19 | # If extensions (or modules to document with autodoc) are in another directory,
20 | # add these directories to sys.path here. If the directory is relative to the
21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
22 | #sys.path.insert(0, os.path.abspath('.'))
23 | sys.path.insert(0, os.path.abspath('../..'))
24 |
25 | # -- General configuration ------------------------------------------------
26 |
27 | # If your documentation needs a minimal Sphinx version, state it here.
28 | #needs_sphinx = '1.0'
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | 'sphinx.ext.intersphinx',
35 | 'sphinx.ext.autodoc',
36 | 'sphinx.ext.graphviz',
37 | 'alabaster',
38 | ]
39 |
40 | graphviz_dot_args = ["-Nshape=box"]
41 | graphviz_output_format = "svg"
42 |
43 | # Add any paths that contain templates here, relative to this directory.
44 | templates_path = ['_templates']
45 |
46 | # The suffix(es) of source filenames.
47 | # You can specify multiple suffix as a list of string:
48 | # source_suffix = ['.rst', '.md']
49 | source_suffix = '.rst'
50 |
51 | # The encoding of source files.
52 | #source_encoding = 'utf-8-sig'
53 |
54 | # The master toctree document.
55 | master_doc = 'index'
56 |
57 | # General information about the project.
58 | project = 'luqum'
59 | copyright = '2016, jurismarches'
60 | author = 'jurismarches'
61 |
62 | # The version info for the project you're documenting, acts as replacement for
63 | # |version| and |release|, also used in various other places throughout the
64 | # built documents.
65 | #
66 | # The short X.Y version.
67 | version = '0.7'
68 | # The full version, including alpha/beta/rc tags.
69 | release = '0.7.1'
70 |
71 | # The language for content autogenerated by Sphinx. Refer to documentation
72 | # for a list of supported languages.
73 | #
74 | # This is also used if you do content translation via gettext catalogs.
75 | # Usually you set "language" from the command line for these cases.
76 | language = "en"
77 |
78 | # There are two options for replacing |today|: either, you set today to some
79 | # non-false value, then it is used:
80 | #today = ''
81 | # Else, today_fmt is used as the format for a strftime call.
82 | #today_fmt = '%B %d, %Y'
83 |
84 | # List of patterns, relative to source directory, that match files and
85 | # directories to ignore when looking for source files.
86 | # This patterns also effect to html_static_path and html_extra_path
87 | exclude_patterns = []
88 |
89 | # The reST default role (used for this markup: `text`) to use for all
90 | # documents.
91 | #default_role = None
92 |
93 | # If true, '()' will be appended to :func: etc. cross-reference text.
94 | #add_function_parentheses = True
95 |
96 | # If true, the current module name will be prepended to all description
97 | # unit titles (such as .. function::).
98 | #add_module_names = True
99 |
100 | # If true, sectionauthor and moduleauthor directives will be shown in the
101 | # output. They are ignored by default.
102 | #show_authors = False
103 |
104 | # The name of the Pygments (syntax highlighting) style to use.
105 | pygments_style = 'sphinx'
106 |
107 | # A list of ignored prefixes for module index sorting.
108 | #modindex_common_prefix = []
109 |
110 | # If true, keep warnings as "system message" paragraphs in the built documents.
111 | #keep_warnings = False
112 |
113 | # If true, `todo` and `todoList` produce output, else they produce nothing.
114 | todo_include_todos = False
115 |
116 |
117 | # -- Options for HTML output ----------------------------------------------
118 |
119 | # The theme to use for HTML and HTML Help pages. See the documentation for
120 | # a list of builtin themes.
121 | html_theme = 'alabaster'
122 |
123 | # Theme options are theme-specific and customize the look and feel of a theme
124 | # further. For a list of options available for each theme, see the
125 | # documentation.
126 | #html_theme_options = {}
127 | html_theme_options = {
128 | 'logo': 'luqum-logo.png',
129 | 'description': 'LUcene QUery Manipulator in python',
130 | 'github_user': 'jurismarches',
131 | 'github_repo': 'luqum',
132 | 'github_banner': True}
133 |
134 | # Add any paths that contain custom themes here, relative to this directory.
135 | #html_theme_path = []
136 |
137 | # The name for this set of Sphinx documents.
138 | # " v documentation" by default.
139 | #html_title = 'luqum v1.0'
140 |
141 | # A shorter title for the navigation bar. Default is the same as html_title.
142 | #html_short_title = None
143 |
144 | # The name of an image file (relative to this directory) to place at the top
145 | # of the sidebar.
146 | #html_logo = None
147 |
148 | # The name of an image file (relative to this directory) to use as a favicon of
149 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
150 | # pixels large.
151 | #html_favicon = None
152 |
153 | # Add any paths that contain custom static files (such as style sheets) here,
154 | # relative to this directory. They are copied after the builtin static files,
155 | # so a file named "default.css" will overwrite the builtin "default.css".
156 | html_static_path = ['_static']
157 |
158 | # Add any extra paths that contain custom files (such as robots.txt or
159 | # .htaccess) here, relative to this directory. These files are copied
160 | # directly to the root of the documentation.
161 | #html_extra_path = []
162 |
163 | # If not None, a 'Last updated on:' timestamp is inserted at every page
164 | # bottom, using the given strftime format.
165 | # The empty string is equivalent to '%b %d, %Y'.
166 | #html_last_updated_fmt = None
167 |
168 | # If true, SmartyPants will be used to convert quotes and dashes to
169 | # typographically correct entities.
170 | #html_use_smartypants = True
171 |
172 | # Custom sidebar templates, maps document names to template names.
173 | #html_sidebars = {}
174 | html_sidebars = {'**': [
175 | 'about.html',
176 | 'navigation.html',
177 | 'relations.html',
178 | 'searchbox.html',
179 | 'donate.html']}
180 |
181 |
182 | # Additional templates that should be rendered to pages, maps page names to
183 | # template names.
184 | #html_additional_pages = {}
185 |
186 | # If false, no module index is generated.
187 | #html_domain_indices = True
188 |
189 | # If false, no index is generated.
190 | #html_use_index = True
191 |
192 | # If true, the index is split into individual pages for each letter.
193 | #html_split_index = False
194 |
195 | # If true, links to the reST sources are added to the pages.
196 | #html_show_sourcelink = True
197 |
198 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
199 | #html_show_sphinx = True
200 |
201 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
202 | #html_show_copyright = True
203 |
204 | # If true, an OpenSearch description file will be output, and all pages will
205 | # contain a tag referring to it. The value of this option must be the
206 | # base URL from which the finished HTML is served.
207 | #html_use_opensearch = ''
208 |
209 | # This is the file name suffix for HTML files (e.g. ".xhtml").
210 | #html_file_suffix = None
211 |
212 | # Language to be used for generating the HTML full-text search index.
213 | # Sphinx supports the following languages:
214 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
215 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
216 | #html_search_language = 'en'
217 |
218 | # A dictionary with options for the search language support, empty by default.
219 | # 'ja' uses this config value.
220 | # 'zh' user can custom change `jieba` dictionary path.
221 | #html_search_options = {'type': 'default'}
222 |
223 | # The name of a javascript file (relative to the configuration directory) that
224 | # implements a search results scorer. If empty, the default will be used.
225 | #html_search_scorer = 'scorer.js'
226 |
227 | # Output file base name for HTML help builder.
228 | htmlhelp_basename = 'luqumdoc'
229 |
230 | # -- Options for LaTeX output ---------------------------------------------
231 |
232 | latex_elements = {
233 | # The paper size ('letterpaper' or 'a4paper').
234 | #'papersize': 'letterpaper',
235 |
236 | # The font size ('10pt', '11pt' or '12pt').
237 | #'pointsize': '10pt',
238 |
239 | # Additional stuff for the LaTeX preamble.
240 | #'preamble': '',
241 |
242 | # Latex figure (float) alignment
243 | #'figure_align': 'htbp',
244 | }
245 |
246 | # Grouping the document tree into LaTeX files. List of tuples
247 | # (source start file, target name, title,
248 | # author, documentclass [howto, manual, or own class]).
249 | latex_documents = [
250 | (master_doc, 'luqum.tex', 'luqum Documentation',
251 | 'jurismarches', 'manual'),
252 | ]
253 |
254 | # The name of an image file (relative to this directory) to place at the top of
255 | # the title page.
256 | #latex_logo = None
257 |
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | #latex_use_parts = False
261 |
262 | # If true, show page references after internal links.
263 | #latex_show_pagerefs = False
264 |
265 | # If true, show URL addresses after external links.
266 | #latex_show_urls = False
267 |
268 | # Documents to append as an appendix to all manuals.
269 | #latex_appendices = []
270 |
271 | # If false, no module index is generated.
272 | #latex_domain_indices = True
273 |
274 |
275 | # -- Options for manual page output ---------------------------------------
276 |
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [
280 | (master_doc, 'luqum', 'luqum Documentation',
281 | [author], 1)
282 | ]
283 |
284 | # If true, show URL addresses after external links.
285 | #man_show_urls = False
286 |
287 |
288 | # -- Options for Texinfo output -------------------------------------------
289 |
290 | # Grouping the document tree into Texinfo files. List of tuples
291 | # (source start file, target name, title, author,
292 | # dir menu entry, description, category)
293 | texinfo_documents = [
294 | (master_doc, 'luqum', 'luqum Documentation',
295 | author, 'luqum', 'One line description of project.',
296 | 'Miscellaneous'),
297 | ]
298 |
299 | # Documents to append as an appendix to all manuals.
300 | #texinfo_appendices = []
301 |
302 | # If false, no module index is generated.
303 | #texinfo_domain_indices = True
304 |
305 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
306 | #texinfo_show_urls = 'footnote'
307 |
308 | # If true, do not generate a @detailmenu in the "Top" node's menu.
309 | #texinfo_no_detailmenu = False
310 |
311 |
312 | # Example configuration for intersphinx: refer to the Python standard library.
313 | intersphinx_mapping = {'https://docs.python.org/': None}
314 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. luqum documentation master file, created by
2 | sphinx-quickstart on Wed Apr 13 10:25:52 2016.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to luqum's documentation!
7 | =================================
8 |
9 | Contents:
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 |
14 | about
15 | install
16 | quick_start
17 | api
18 |
19 |
20 |
21 |
22 | Indices and tables
23 | ==================
24 |
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 |
29 |
--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Installation
3 | ============
4 |
5 | At the command line:
6 |
7 | .. code-block:: bash
8 |
9 | $ [sudo] pip install luqum
10 |
11 |
--------------------------------------------------------------------------------
/luqum-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/luqum-logo.png
--------------------------------------------------------------------------------
/luqum/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __version__ = '1.0.0'
4 | __version_info__ = tuple(__version__.split('.'))
5 |
--------------------------------------------------------------------------------
/luqum/auto_head_tail.py:
--------------------------------------------------------------------------------
1 | """It can be teadious to add spaces in a tree you generate programatically.
2 |
3 | This module provide a utility to transform a tree so that it contains necessary head/tail
4 | for expression to be printable.
5 | """
6 |
7 | from . import visitor
8 |
9 |
10 | class AutoHeadTail(visitor.TreeTransformer):
11 | """This class implements a transformer so that hand built tree,
12 | can have reasonable values for `head` and `tail` on their items,
13 | in order for the expression to be printable.
14 | """
15 |
16 | SPACER = " "
17 |
18 | def add_head(self, node):
19 | if not node.head:
20 | node.head = self.SPACER
21 |
22 | def add_tail(self, node):
23 | if not node.tail:
24 | node.tail = self.SPACER
25 |
26 | def visit_base_operation(self, node, context):
27 | new_node = node.clone_item()
28 | children = list(self.clone_children(node, new_node, context))
29 | # add tail to first node
30 | self.add_tail(children[0])
31 | # add head and tail to inner nodes
32 | for child in children[1:-1]:
33 | self.add_head(child)
34 | self.add_tail(child)
35 | # add head to last
36 | self.add_head(children[-1])
37 | new_node.children = children
38 | yield new_node
39 |
40 | def visit_unknown_operation(self, node, context):
41 | new_node = node.clone_item()
42 | children = list(self.clone_children(node, new_node, context))
43 | # add tail to each node, but last
44 | for child in children[:-1]:
45 | self.add_tail(child)
46 | new_node.children = children
47 | yield new_node
48 |
49 | def visit_not(self, node, context):
50 | new_node = node.clone_item()
51 | children = list(self.clone_children(node, new_node, context))
52 | # add head to children, to have space between NOT and sub expression
53 | self.add_head(children[0])
54 | new_node.children = children
55 | yield new_node
56 |
57 | def visit_range(self, node, context):
58 | new_node = node.clone_item()
59 | children = list(self.clone_children(node, new_node, context))
60 | # add tail to lower_bound, and head to upper bound
61 | self.add_tail(children[0])
62 | self.add_head(children[-1])
63 | new_node.children = children
64 | yield new_node
65 |
66 | def __call__(self, tree):
67 | new_tree = self.visit(tree)
68 | return new_tree
69 |
70 |
71 | auto_head_tail = AutoHeadTail()
72 | """method to auto add head and tail to items of a lucene tree so that it is printable
73 | """
74 |
--------------------------------------------------------------------------------
/luqum/check.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import functools
3 | import math
4 | import re
5 |
6 | from . import tree
7 | from . import visitor
8 | from .exceptions import NestedSearchFieldException, ObjectSearchFieldException
9 | from .utils import flatten_nested_fields_specs, normalize_object_fields_specs
10 |
11 |
12 | def camel_to_lower(name):
13 | return "".join(
14 | "_" + w.lower() if w.isupper() else w.lower()
15 | for w in name).lstrip("_")
16 |
17 |
18 | sign = functools.partial(math.copysign, 1)
19 |
20 |
21 | def _check_children(f):
22 | """A decorator to call check on item children
23 | """
24 | @functools.wraps(f)
25 | def wrapper(self, item, parents):
26 | yield from f(self, item, parents)
27 | for child in item.children:
28 | yield from self.check(child, parents + [item])
29 | return wrapper
30 |
31 |
32 | class LuceneCheck:
33 | """Check if a query is consistent
34 |
35 | This is intended to use with query constructed as tree,
36 | as well as those parsed by the parser, which is more tolerant.
37 |
38 | :param int zeal: if zeal > 0 do extra check of some pitfalls, depending on zeal level
39 | """
40 | field_name_re = re.compile(r"^\w+$")
41 | space_re = re.compile(r"\s")
42 | invalid_term_chars_re = re.compile(r"[+/-]")
43 |
44 | SIMPLE_EXPR_FIELDS = (
45 | tree.Boost, tree.Proximity, tree.Fuzzy, tree.Word, tree.Phrase)
46 |
47 | FIELD_EXPR_FIELDS = tuple(list(SIMPLE_EXPR_FIELDS) + [tree.FieldGroup])
48 |
49 | def __init__(self, zeal=0):
50 | self.zeal = zeal
51 |
52 | def _check_field_name(self, fname):
53 | return self.field_name_re.match(fname) is not None
54 |
55 | @_check_children
56 | def check_search_field(self, item, parents):
57 | if not self._check_field_name(item.name):
58 | yield "%s is not a valid field name" % item.name
59 | if not isinstance(item.expr, self.FIELD_EXPR_FIELDS):
60 | yield "field expression is not valid : %s" % item
61 |
62 | @_check_children
63 | def check_group(self, item, parents):
64 | if parents and isinstance(parents[-1], tree.SearchField):
65 | yield "Group misuse, after SearchField you should use Group : %s" % parents[-1]
66 |
67 | @_check_children
68 | def check_field_group(self, item, parents):
69 | if not parents or not isinstance(parents[-1], tree.SearchField):
70 | yield ("FieldGroup misuse, it must be used after SearchField : %s" %
71 | (parents[-1] if parents else item))
72 |
73 | def check_range(self, item, parents):
74 | # TODO check lower bound <= higher bound taking into account wildcard and numbers
75 | return iter([])
76 |
77 | def check_word(self, item, parents):
78 | if self.space_re.search(item.value):
79 | yield "A single term value can't hold a space %s" % item
80 | if self.zeal and self.invalid_term_chars_re.search(item.value):
81 | yield "Invalid characters in term value: %s" % item.value
82 |
83 | def check_fuzzy(self, item, parents):
84 | if sign(item.degree) < 0:
85 | yield "invalid degree %d, it must be positive" % item.degree
86 | if not isinstance(item.term, tree.Word):
87 | yield "Fuzzy should be on a single term in %s" % str(item)
88 |
89 | def check_proximity(self, item, parents):
90 | if not isinstance(item.term, tree.Phrase):
91 | yield "Proximity can be only on a phrase in %s" % str(item)
92 |
93 | @_check_children
94 | def check_boost(self, item, parents):
95 | return iter([])
96 |
97 | @_check_children
98 | def check_base_operation(self, item, parents):
99 | return iter([])
100 |
101 | @_check_children
102 | def check_plus(self, item, parents):
103 | return iter([])
104 |
105 | def _check_not_operator(self, item, parents):
106 | """Common checker for NOT and - operators"""
107 | if self.zeal:
108 | if isinstance(parents[-1], tree.OrOperation):
109 | yield ("Prohibit or Not really means 'AND NOT' " +
110 | "wich is inconsistent with OR operation in %s" % parents[-1])
111 |
112 | @_check_children
113 | def check_not(self, item, parents):
114 | return self._check_not_operator(item, parents)
115 |
116 | @_check_children
117 | def check_prohibit(self, item, parents):
118 | return self._check_not_operator(item, parents)
119 |
120 | def check(self, item, parents=[]):
121 | # dispatching check to anothe method
122 | for cls in item.__class__.mro():
123 | meth = getattr(self, "check_" + camel_to_lower(cls.__name__), None)
124 | if meth is not None:
125 | yield from meth(item, parents)
126 | break
127 | else:
128 | yield "Unknown item type %s : %s" % (item.__class__.__name__, str(item))
129 |
130 | def __call__(self, tree):
131 | """return True only if there are no error
132 | """
133 | for error in self.check(tree):
134 | return False
135 | return True
136 |
137 | def errors(self, tree):
138 | """List all errors"""
139 | return list(self.check(tree))
140 |
141 |
142 | class CheckNestedFields(visitor.TreeVisitor):
143 | """
144 | Visit the lucene tree to make some checks
145 |
146 | In particular to check nested fields.
147 |
148 | :param nested_fields: a dict where keys are name of nested fields,
149 | values are dict of sub-nested fields or an empty dict for leaf
150 | :param object_fields:
151 | this is either None, in which case unknown object fields will be accepted,
152 | or a dict of sub-nested fields (like nested_fields)
153 | """
154 |
155 | def __init__(self, nested_fields, object_fields=None, sub_fields=None):
156 | assert isinstance(nested_fields, dict)
157 | self.object_fields = normalize_object_fields_specs(object_fields)
158 | self.object_prefixes = set(k.rsplit(".", 1)[0] for k in self.object_fields or [])
159 | self.nested_fields = flatten_nested_fields_specs(nested_fields)
160 | self.nested_prefixes = set(k.rsplit(".", 1)[0] for k in self.nested_fields)
161 | self.sub_fields = normalize_object_fields_specs(sub_fields)
162 | super().__init__(track_parents=True)
163 |
164 | def visit_search_field(self, node, context):
165 | """
166 | On search field node, check nested fields logic
167 | """
168 | child_context = dict(context) # copy
169 | child_context["prefix"] = context["prefix"] + node.name.split(".")
170 | yield from self.generic_visit(node, child_context)
171 |
172 | def _check_final_operation(self, node, context):
173 | prefix = context["prefix"]
174 | if prefix:
175 | fullname = ".".join(prefix)
176 | if fullname in self.nested_prefixes:
177 | raise NestedSearchFieldException(
178 | ('''"{expr}" can't be directly attributed to "{field}"''' +
179 | ''' as it is a nested field''')
180 | .format(expr=str(node), field=fullname))
181 | elif fullname in self.object_prefixes:
182 | raise NestedSearchFieldException(
183 | ('''"{expr}" can't be directly attributed to "{field}"''' +
184 | ''' as it is an object field''')
185 | .format(expr=str(node), field=fullname))
186 | # note : the above check do not stand for subfield,
187 | # as their field can have an expression
188 | elif len(prefix) > 1:
189 | unknown_field = (
190 | self.sub_fields is not None and
191 | self.object_fields is not None and
192 | fullname not in self.sub_fields and
193 | fullname not in self.object_fields and
194 | fullname not in self.nested_fields)
195 | if unknown_field:
196 | raise ObjectSearchFieldException(
197 | '''"{expr}" attributed to unknown nested or object field "{field}"'''
198 | .format(expr=str(node), field=fullname))
199 |
200 | def visit_phrase(self, node, context):
201 | """
202 | On phrase field, verify term is in a final search field
203 | """
204 | yield self._check_final_operation(node, context)
205 |
206 | def visit_term(self, node, context):
207 | """
208 | On term field, verify term is in a final search field
209 | """
210 | yield self._check_final_operation(node, context)
211 |
212 | def __call__(self, tree):
213 | return list(self.visit_iter(tree, context={"prefix": []}))
214 |
--------------------------------------------------------------------------------
/luqum/deprecated_utils.py:
--------------------------------------------------------------------------------
1 | """Deprecated visitor helper classes.
2 |
3 | see :py:mod:`luqum.visitor` for newer implementations
4 | """
5 |
6 | import warnings
7 |
8 | from .visitor import camel_to_lower
9 |
10 |
11 | class LuceneTreeVisitor:
12 | """
13 | Tree Visitor base class, inspired by python's :class:`ast.NodeVisitor`.
14 |
15 | This class is meant to be subclassed, with the subclass implementing
16 | visitor methods for each Node type it is interested in.
17 |
18 | By default, those visitor method should be named ``'visit_'`` + class
19 | name of the node, converted to lower_case (ie: visit_search_node for a
20 | SearchNode class).
21 |
22 | You can tweak this behaviour by overriding the `visitor_method_prefix` &
23 | `generic_visitor_method_name` class attributes.
24 |
25 | If the goal is to modify the initial tree,
26 | use :py:class:`LuceneTreeTranformer` instead.
27 | """
28 | visitor_method_prefix = 'visit_'
29 | generic_visitor_method_name = 'generic_visit'
30 |
31 | _get_method_cache = None
32 |
33 | def _get_method(self, node):
34 | if self._get_method_cache is None:
35 | self._get_method_cache = {}
36 | try:
37 | meth = self._get_method_cache[type(node)]
38 | except KeyError:
39 | for cls in node.__class__.mro():
40 | try:
41 | method_name = "{}{}".format(
42 | self.visitor_method_prefix,
43 | camel_to_lower(cls.__name__)
44 | )
45 | meth = getattr(self, method_name)
46 | break
47 | except AttributeError:
48 | continue
49 | else:
50 | meth = getattr(self, self.generic_visitor_method_name)
51 | self._get_method_cache[type(node)] = meth
52 | return meth
53 |
54 | def visit(self, node, parents=None):
55 | """ Basic, recursive traversal of the tree. """
56 | warnings.warn(
57 | "LuceneTreeVisitor is deprecated in favor of visitor.TreeVisitor",
58 | DeprecationWarning,
59 | )
60 | parents = parents or []
61 | method = self._get_method(node)
62 | yield from method(node, parents)
63 | for child in node.children:
64 | yield from self.visit(child, parents + [node])
65 |
66 | def generic_visit(self, node, parents=None):
67 | """
68 | Default visitor function, called if nothing matches the current node.
69 | """
70 | return iter([]) # No-op
71 |
72 |
73 | class LuceneTreeTransformer(LuceneTreeVisitor):
74 | """
75 | A :class:`LuceneTreeVisitor` subclass that walks the abstract syntax tree
76 | and allows modifications of traversed nodes.
77 |
78 | The `LuceneTreeTransormer` will walk the AST and use the return value of the
79 | visitor methods to replace or remove the old node. If the return value of
80 | the visitor method is ``None``, the node will be removed from its location,
81 | otherwise it is replaced with the return value. The return value may be the
82 | original node, in which case no replacement takes place.
83 | """
84 |
85 | def replace_node(self, old_node, new_node, parent):
86 | for k, v in parent.__dict__.items(): # pragma: no branch
87 | if v == old_node:
88 | parent.__dict__[k] = new_node
89 | break
90 | elif isinstance(v, list):
91 | try:
92 | i = v.index(old_node)
93 | if new_node is None:
94 | del v[i]
95 | else:
96 | v[i] = new_node
97 | break
98 | except ValueError:
99 | pass # this was not the attribute containing old_node
100 | elif isinstance(v, tuple):
101 | try:
102 | i = v.index(old_node)
103 | v = list(v)
104 | if new_node is None:
105 | del v[i]
106 | else:
107 | v[i] = new_node
108 | parent.__dict__[k] = tuple(v)
109 | break
110 | except ValueError:
111 | pass # this was not the attribute containing old_node
112 |
113 | def generic_visit(self, node, parent=None):
114 | return node
115 |
116 | def visit(self, node, parents=None):
117 | """
118 | Recursively traverses the tree and replace nodes with the appropriate
119 | visitor method's return values.
120 | """
121 | warnings.warn(
122 | "LuceneTreeTransformer is deprecated in favor of visitor.TreeTransformer",
123 | DeprecationWarning,
124 | )
125 | parents = parents or []
126 | method = self._get_method(node)
127 | new_node = method(node, parents)
128 | if parents:
129 | self.replace_node(node, new_node, parents[-1])
130 | node = new_node
131 | if node is not None:
132 | for child in node.children:
133 | self.visit(child, parents + [node])
134 | return node
135 |
136 |
137 | class LuceneTreeVisitorV2(LuceneTreeVisitor):
138 | """
139 | V2 of the LuceneTreeVisitor allowing to evaluate the AST
140 |
141 | It differs from py:cls:`LuceneTreeVisitor`
142 | because it's up to the visit method to recursively call children (or not)
143 |
144 | This class is meant to be subclassed, with the subclass implementing
145 | visitor methods for each Node type it is interested in.
146 |
147 | By default, those visitor method should be named ``'visit_'`` + class
148 | name of the node, converted to lower_case (ie: visit_search_node for a
149 | SearchNode class).
150 |
151 | You can tweak this behaviour by overriding the `visitor_method_prefix` &
152 | `generic_visitor_method_name` class attributes.
153 |
154 | If the goal is to modify the initial tree,
155 | use :py:class:`LuceneTreeTranformer` instead.
156 | """
157 |
158 | def visit(self, node, parents=None, context=None):
159 | """ Basic, recursive traversal of the tree.
160 |
161 | :param list parents: the list of parents
162 | :parma dict context: a dict of contextual variable for free use
163 | to track states while traversing the tree
164 | """
165 | warnings.warn(
166 | "LuceneTreeVisitorV2 is deprecated in favor of visitor.TreeVisitor",
167 | DeprecationWarning,
168 | )
169 | if parents is None:
170 | parents = []
171 |
172 | method = self._get_method(node)
173 | return method(node, parents, context)
174 |
175 | def generic_visit(self, node, parents=None, context=None):
176 | """
177 | Default visitor function, called if nothing matches the current node.
178 | """
179 | raise AttributeError(
180 | "No visitor found for this type of node: {}".format(
181 | node.__class__
182 | )
183 | )
184 |
--------------------------------------------------------------------------------
/luqum/elasticsearch/__init__.py:
--------------------------------------------------------------------------------
1 | from .visitor import ElasticsearchQueryBuilder # noqa:
2 | from .schema import SchemaAnalyzer # noqa:
3 |
--------------------------------------------------------------------------------
/luqum/elasticsearch/nested.py:
--------------------------------------------------------------------------------
1 | """If you have a query with a nested query containing operations,
2 | when using named queries, Elasticsearch won't report inner matching.
3 |
4 | This is a problem if you extensively use it.
5 | """
6 |
7 |
8 | def get_first_name(query):
9 | if isinstance(query, dict):
10 | if "_name" in query:
11 | return query["_name"]
12 | elif "bool" in query:
13 | # do not go down bool
14 | return None
15 | else:
16 | children = query.values()
17 | elif isinstance(query, list):
18 | children = query
19 | else:
20 | return None
21 | iter_candidates = (get_first_name(child) for child in children)
22 | candidates = [candidate for candidate in iter_candidates if candidate is not None]
23 | return candidates[0] if candidates else None
24 |
25 |
26 | def extract_nested_queries(query, query_nester=None):
27 | """given a query,
28 | extract all queries that are under a nested query and boolean operations,
29 | returning an atomic nested version of them.
30 | Those nested queries, also take care of changing the name to the nearest inner name,
31 |
32 | This is useful for Elasticsearch won't go down explaining why a nested query is matching.
33 |
34 | :param dict query: elasticsearch query to analyze
35 | :param callable query_nester: this is the function called to nest sub queries, leave it default
36 | :return list: queries that you should run to get all matching
37 |
38 | .. note:: because we re-nest part of bool queries, results might not be accurate
39 | for::
40 | {"bool": "must" : [
41 | {"nested": {"path": "a", "match": {"x": "y"}}},
42 | {"nested": {"path": "a", "match": {"x": "z"}}}
43 | ]}
44 | is not the same as::
45 | {"nested": {"path": "a", "bool": "must": [{"match": {"x": "y"}}, {"match": {"x": "z"}}]}}
46 |
47 | if x is multivalued.
48 | The first would match `{"a": [{"x": "y"}, {"x": "z"}]}`
49 | While the second would only match if `x` contains `"y z"` or `"z y"`
50 | """
51 | queries = [] # this contains our result
52 | in_nested = query_nester is not None
53 | sub_query_nester = query_nester
54 | if isinstance(query, dict):
55 | if "nested" in query:
56 | params = {k: v for k, v in query["nested"].items() if k not in ("query", "name")}
57 |
58 | def sub_query_nester_func(req, name):
59 | nested = {"nested": {"query": req, **params}}
60 | if query_nester is not None:
61 | nested = query_nester(nested, name)
62 | if name is not None:
63 | nested["nested"]["_name"] = name
64 | return nested
65 |
66 | sub_query_nester = sub_query_nester_func
67 |
68 | bool_param = {"must", "should", "must_not"} & set(query.keys())
69 | if bool_param and in_nested:
70 | # we are in a list of operations in a bool inside a nested,
71 | # make a query with nested on sub arguments
72 | op, = bool_param # must or should or must_not
73 | # normalize to a list
74 | sub_queries = query[op] if isinstance(query[op], list) else [query[op]]
75 | # add nesting
76 | nested_sub_queries = [
77 | query_nester(sub_query, get_first_name(sub_query)) for sub_query in sub_queries
78 | ]
79 | # those are queries we want to return
80 | queries.extend(nested_sub_queries)
81 | # continue processing in each sub query
82 | # (before nesting, nesting is contained in query_nester)
83 | children = sub_queries
84 | else:
85 | children = query.values()
86 | elif isinstance(query, list):
87 | children = query
88 | else:
89 | # leaf: final recursivity
90 | children = []
91 |
92 | # recurse
93 | for child_query in children:
94 | queries.extend(
95 | extract_nested_queries(child_query, query_nester=sub_query_nester)
96 | )
97 | return queries
98 |
--------------------------------------------------------------------------------
/luqum/elasticsearch/schema.py:
--------------------------------------------------------------------------------
1 | """Analyzing elasticSearch schema to provide helpers for query transformation
2 | """
3 |
4 |
5 | class SchemaAnalyzer:
6 | """An helper that analyze ElasticSearch schema, to give you suitable options
7 | to use when transforming queries.
8 |
9 | :param dict schema: the index settings as a dict.
10 | """
11 |
12 | def __init__(self, schema):
13 | self.settings = schema.get("settings", {})
14 | mappings = schema.get("mappings", {})
15 | if mappings.get("properties"):
16 | # ES >= 6 : one document type per index
17 | self.mappings = {"_doc": mappings}
18 | else:
19 | # ES < 6 : multiple document types per index allowed
20 | self.mappings = mappings
21 |
22 | def _dot_name(self, fname, parents):
23 | return ".".join([p[0] for p in parents] + [fname])
24 |
25 | def default_field(self):
26 | try:
27 | return self.settings["query"]["default_field"]
28 | except KeyError:
29 | return "*"
30 |
31 | def _walk_properties(self, properties, parents=None, subfields=False):
32 | if parents is None:
33 | parents = []
34 | for fname, fdef in properties.items():
35 | yield fname, fdef, parents
36 | if subfields and "fields" in fdef:
37 | subfield_parents = parents + [(fname, fdef)]
38 | subdef = dict(fdef) # sub field definition overload their parents one
39 | subfield_defs = subdef.pop("fields")
40 | for fname, fdef in subfield_defs.items():
41 | fdef = dict(subdef, **fdef)
42 | yield fname, fdef, subfield_parents
43 | inner_properties = fdef.get("properties", {})
44 | if inner_properties:
45 | new_parents = parents + [(fname, fdef)]
46 | yield from self._walk_properties(inner_properties, new_parents, subfields)
47 |
48 | def iter_fields(self, subfields=False):
49 | for mapping in self.mappings.values():
50 | yield from self._walk_properties(mapping.get("properties", {}), subfields=subfields)
51 |
52 | def not_analyzed_fields(self):
53 | for fname, fdef, parents in self.iter_fields(subfields=True):
54 | not_analyzed = (
55 | (fdef.get("type") == "string" and fdef.get("index", "") == "not_analyzed") or
56 | fdef.get("type") not in ("text", "string", "nested", "object")
57 | )
58 | if not_analyzed:
59 | yield self._dot_name(fname, parents)
60 |
61 | def nested_fields(self):
62 | result = {}
63 | for fname, fdef, parents in self.iter_fields():
64 | pdef = parents[-1][1] if parents else {}
65 | if pdef.get("type") == "nested":
66 | target = result
67 | cumulated = []
68 | for n, _ in parents:
69 | cumulated.append(n)
70 | key = ".".join(cumulated)
71 | if key in target:
72 | target = target[key]
73 | cumulated = []
74 | if cumulated:
75 | key = ".".join(cumulated)
76 | target = target.setdefault(key, {})
77 | target[fname] = {}
78 | return result
79 |
80 | def object_fields(self):
81 | for fname, fdef, parents in self.iter_fields():
82 | pdef = parents[-1][1] if parents else {}
83 | if pdef.get("type") == "object" and fdef.get("type") not in ("object", "nested"):
84 | yield self._dot_name(fname, parents)
85 |
86 | def sub_fields(self):
87 | """return all known subfields
88 | """
89 | # we do not ask subfields, for they would be lost in the mass
90 | for fname, fdef, parents in self.iter_fields():
91 | subfields = fdef.get("fields")
92 | if subfields:
93 | subfield_parents = parents + [(fname, fdef)]
94 | for subname in subfields:
95 | yield self._dot_name(subname, subfield_parents)
96 |
97 | def query_builder_options(self):
98 | """return options suitable for
99 | :py:class:`luqum.elasticsearch.visitor.ElasticsearchQueryBuilder`
100 | """
101 | return {
102 | "default_field": self.default_field(),
103 | "not_analyzed_fields": list(self.not_analyzed_fields()),
104 | "nested_fields": self.nested_fields(),
105 | "object_fields": list(self.object_fields()),
106 | }
107 |
--------------------------------------------------------------------------------
/luqum/exceptions.py:
--------------------------------------------------------------------------------
1 | class InconsistentQueryException(Exception):
2 | """Raised when a query have a problem in its structure
3 | """
4 |
5 |
6 | class OrAndAndOnSameLevel(InconsistentQueryException):
7 | """
8 | Raised when a OR and a AND are on the same level as we don't know how to
9 | handle this case
10 | """
11 |
12 |
13 | class NestedSearchFieldException(InconsistentQueryException):
14 | """
15 | Raised when a SearchField is nested in an other SearchField as it doesn't
16 | make sense. For Instance field1:(spam AND field2:eggs)
17 | """
18 |
19 |
20 | class ObjectSearchFieldException(InconsistentQueryException):
21 | """
22 | Raised when a doted field name is queried which is not an object field
23 | """
24 |
25 |
26 | class ParseError(ValueError):
27 | """Exception while parsing a lucene statement
28 | """
29 |
30 |
31 | class ParseSyntaxError(ParseError):
32 | """Raised when parser encounters an invalid statement
33 | """
34 |
35 |
36 | class IllegalCharacterError(ParseError):
37 | """
38 | Raised when parser encounters an invalid character
39 | """
40 |
--------------------------------------------------------------------------------
/luqum/head_tail.py:
--------------------------------------------------------------------------------
1 | """Utilities to manage head and tail of elements
2 |
3 | The scope is to avoid loosing part of the original text in the final tree.
4 | """
5 | from .tree import Item
6 |
7 |
8 | class TokenValue:
9 |
10 | def __init__(self, value):
11 | self.value = value
12 | self.pos = None
13 | self.size = None
14 | self.head = ""
15 | self.tail = ""
16 |
17 | def __repr__(self):
18 | return "TokenValue(%s)" % self.value
19 |
20 | def __str__(self):
21 | return str(self.value) if self.value else ""
22 |
23 |
24 | class HeadTailLexer:
25 | """Utility to handle head and tail at lexer time.
26 | """
27 |
28 | LEXER_ATTR = "_luqum_headtail"
29 |
30 | @classmethod
31 | def handle(cls, token, orig_value):
32 | """Handling a token.
33 |
34 | .. note::
35 | PLY does not gives acces to previous tokens,
36 | although it does not provide any infrastructure for handling specific state.
37 |
38 | So we use the strategy
39 | of puting a :py:cls:`HeadTailLexer`instance as an attribute of the lexer
40 | each time we start a new tokenization.
41 | """
42 | # get instance
43 | if token.lexpos == 0:
44 | # first token make instance
45 | instance = cls()
46 | setattr(token.lexer, cls.LEXER_ATTR, instance)
47 | else:
48 | instance = getattr(token.lexer, cls.LEXER_ATTR)
49 | # handle
50 | instance.handle_token(token, orig_value)
51 |
52 | def __init__(self):
53 | self.head = None
54 | """This will track the head of next element, useful only for first element
55 | """
56 | self.last_elt = None
57 | """This will track the last token, so we can use it to add the tail to it.
58 | """
59 |
60 | def handle_token(self, token, orig_value):
61 | """Handle head and tail for tokens
62 |
63 | The scope is to avoid loosing part of the original text and keep it in elements.
64 | """
65 | # handle headtail
66 | if token.type == "SEPARATOR":
67 | if token.lexpos == 0:
68 | # spaces at expression start, head for next token
69 | self.head = token.value
70 | else:
71 | # tail of last processed token
72 | if self.last_elt is not None:
73 | self.last_elt.value.tail += token.value
74 | else:
75 | # if there is a head, apply
76 | head = self.head
77 | if head is not None:
78 | token.value.head = head
79 | self.head = None
80 | # keep tracks of token, to apply tail later
81 | self.last_elt = token
82 | # also set pos and size
83 | if isinstance(token.value, (Item, TokenValue)):
84 | token.value.pos = token.lexpos
85 | token.value.size = len(orig_value)
86 |
87 |
88 | token_headtail = HeadTailLexer.handle
89 |
90 |
91 | class HeadTailManager:
92 | """Utility to hande head and tail at expression parse time
93 | """
94 |
95 | def pos(self, p, head_transfer=False, tail_transfer=False):
96 | """Compute pos and size of element 0 based on it's parts (p[1:])
97 |
98 | :param list p: the parser expression as in PLY
99 | :param bool head_transfer: True if head of first child will be transfered to p[0]
100 | :param bool tail_transfer: True if tail of last child wiil be transfered to p[0]
101 | """
102 | # pos
103 | if p[1].pos is not None:
104 | p[0].pos = p[1].pos
105 | if not head_transfer:
106 | # head is'nt transfered, so we are before it
107 | p[0].pos -= len(p[1].head)
108 | # size
109 | p[0].size = sum(
110 | (elt.size or 0) + len(elt.head or "") + len(elt.tail or "") for elt in p[1:])
111 | if head_transfer and p[1].head:
112 | # we account head in size, remove it
113 | p[0].size -= len(p[1].head)
114 | last_p = p[len(p) - 1] # negative indexing not supported by PLY
115 | if tail_transfer and last_p.tail:
116 | # we account head in size, remove it
117 | p[0].size -= len(last_p.tail)
118 |
119 | def binary_operation(self, p, op_tail):
120 | self.pos(p, head_transfer=False, tail_transfer=False)
121 | # correct size
122 | p[0].size -= len(op_tail)
123 |
124 | def simple_term(self, p):
125 | self.pos(p, head_transfer=True, tail_transfer=True)
126 | p[0].head = p[1].head
127 | p[0].tail = p[1].tail
128 |
129 | def unary(self, p):
130 | """OP expr"""
131 | self.pos(p, head_transfer=True, tail_transfer=False)
132 | p[0].head = p[1].head
133 | p[2].head = p[1].tail + p[2].head
134 |
135 | def post_unary(self, p):
136 | """expr OP"""
137 | self.pos(p, head_transfer=False, tail_transfer=True)
138 | p[1].tail += p[2].head
139 | p[0].tail = p[2].tail
140 |
141 | def paren(self, p):
142 | """( expr )"""
143 | self.pos(p, head_transfer=True, tail_transfer=True)
144 | # p[0] is global element (Group or FieldGroup)
145 | # p[2] is content
146 | # p[1] is left parenthesis
147 | p[0].head = p[1].head
148 | p[2].head = p[1].tail + p[2].head
149 | # p[3] is right parenthesis
150 | p[2].tail += p[3].head
151 | p[0].tail = p[3].tail
152 |
153 | def range(self, p):
154 | """[ expr TO expr ]"""
155 | self.pos(p, head_transfer=True, tail_transfer=True)
156 | # p[0] is global element (Range)
157 | # p[2] is lower bound
158 | p[0].head = p[1].head
159 | p[2].head = p[1].tail + p[2].head
160 | # p[3] is TO
161 | # p[4] is upper bound
162 | p[2].tail += p[3].head
163 | p[4].head = p[3].tail + p[4].head
164 | # p[5] is upper braket
165 | p[4].tail += p[5].head
166 | p[0].tail = p[5].tail
167 |
168 | def search_field(self, p):
169 | """name: expr"""
170 | self.pos(p, head_transfer=True, tail_transfer=False)
171 | # p[0] is global element (SearchField)
172 | # p[1] is search field name
173 | # p[2] is COLUMN
174 | p[0].head = p[1].head
175 | if p[1].tail or p[2].head:
176 | pass # FIXME: add warning, or handle space between point and name in SearchField ?
177 | # p[3] is the expression
178 | p[3].head = p[2].tail + p[3].head
179 |
180 |
181 | head_tail = HeadTailManager()
182 | """singleton of HeadTailManager
183 | """
184 |
--------------------------------------------------------------------------------
/luqum/naming.py:
--------------------------------------------------------------------------------
1 | """Support for naming expressions
2 |
3 | In order to use elastic search named query, we need to be able to assign names to expressions
4 | and retrieve their positions in the query text.
5 |
6 | This module adds support for that.
7 | """
8 | from . import tree
9 | from .visitor import PathTrackingVisitor, PathTrackingTransformer
10 |
11 |
12 | #: Names are added to tree items via an attribute named `_luqum_name`
13 | NAME_ATTR = "_luqum_name"
14 |
15 |
16 | def set_name(node, value):
17 | setattr(node, NAME_ATTR, value)
18 |
19 |
20 | def get_name(node):
21 | return getattr(node, NAME_ATTR, None)
22 |
23 |
24 | class TreeAutoNamer(PathTrackingVisitor):
25 | """Helper for :py:func:`auto_name`
26 | """
27 |
28 | LETTERS = "abcdefghilklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
29 | _pos_letter = {l: i for i, l in enumerate(LETTERS)}
30 |
31 | def next_name(self, name):
32 | """Given name, return next name
33 |
34 | ::
35 | >>> tan = TreeAutoNamer()
36 | >>> tan.next_name(None)
37 | 'a'
38 | >>> tan.next_name('aZ')
39 | 'aZa'
40 | >>> tan.next_name('azb')
41 | 'azc'
42 | """
43 | if name is None:
44 | # bootstrap
45 | return self.LETTERS[0]
46 | else:
47 | actual_pos = self._pos_letter[name[-1]]
48 | try:
49 | # we want to increment last letter
50 | return name[:-1] + self.LETTERS[actual_pos + 1]
51 | except IndexError:
52 | # we exhausts letters, add a new one instead
53 | return name + self.LETTERS[0]
54 |
55 | def visit_base_operation(self, node, context):
56 | """name is to be set on children of operations
57 | """
58 | # put a _name on each children
59 | name = context["global"]["name"]
60 | for i, child in enumerate(node.children):
61 | name = self.next_name(name)
62 | set_name(child, name)
63 | # remember name to path
64 | context["global"]["name_to_path"][name] = context["path"] + (i,)
65 | # put name back in global context
66 | context["global"]["name"] = name
67 | yield from self.generic_visit(node, context)
68 |
69 | def visit(self, node):
70 | """visit the tree and add names to nodes while tracking their path
71 | """
72 | # trick: we use a "global" dict inside context dict so that when we copy context,
73 | # we still track the same objects
74 | context = {"global": {"name": None, "name_to_path": {}}}
75 | super().visit(node, context)
76 | name_to_path = context["global"]["name_to_path"]
77 | # handle special case, if we have no name so far, put one on the root
78 | if not name_to_path:
79 | node_name = self.next_name(context["global"]["name"])
80 | set_name(node, node_name)
81 | name_to_path[node_name] = ()
82 | return name_to_path
83 |
84 |
85 | def auto_name(tree, targets=None, all_names=False):
86 | """Automatically add names to nodes of a parse tree, in order to be able to track matching.
87 |
88 | We add them to top nodes under operations as this is where it is useful for ES named queries
89 |
90 | :return dict: association of name with the path (as a tuple) to a the corresponding children
91 | """
92 | return TreeAutoNamer().visit(tree)
93 |
94 |
95 | def matching_from_names(names, name_to_path):
96 | """Utility to convert a list of name and the result of auto_name
97 | to the matching parameter for :py:class:`MatchingPropagator`
98 |
99 | :param list names: list of names
100 | :param dict name_to_path: association of names with path to children
101 | :return tuple: (set of matching paths, set of other known paths)
102 | """
103 | matching = {name_to_path[name] for name in names}
104 | return (matching, set(name_to_path.values()) - matching)
105 |
106 |
107 | def element_from_path(tree, path):
108 | """Given a tree, retrieve element corresponding to path
109 |
110 | :param luqum.tree.Item tree: luqum expression tree
111 | :param tuple path: tuple representing top down access to a child
112 | :return luqum.tree.Item: target item
113 | """
114 | # python likes iterations over recursivity
115 | node = tree
116 | path = list(path)
117 | while path:
118 | node = node.children[path.pop(0)]
119 | return node
120 |
121 |
122 | def element_from_name(tree, name, name_to_path):
123 | return element_from_path(tree, name_to_path[name])
124 |
125 |
126 | class MatchingPropagator:
127 | """Class propagating matching to upper elements based on known base element matching
128 |
129 | :param luqum.tree.Item default_operation: tells how to treat UnknownOperation.
130 | Choose between :py:class:`luqum.tree.OrOperation` and :py:class:`luqum.tree.AndOperation`
131 | """
132 |
133 | OR_NODES = (tree.OrOperation,)
134 | """A tuple of nodes types considered as OR operations
135 | """
136 | NEGATION_NODES = (tree.Not, tree.Prohibit)
137 | """A tuple of nodes types considered as NOT operations
138 | """
139 | NO_CHILDREN_PROPAGATE = (tree.Range, tree.BaseApprox)
140 | """A tuple of nodes for which propagation is of no use
141 | """
142 |
143 | def __init__(self, default_operation=tree.OrOperation):
144 | if default_operation is tree.OrOperation:
145 | self.OR_NODES = self.OR_NODES + (tree.UnknownOperation,)
146 |
147 | def _status_from_parent(self, path, matching, other):
148 | """Get status from nearest parent in hierarchie which had a name
149 | """
150 | if path in matching:
151 | return True
152 | elif path in other:
153 | return False
154 | elif not path:
155 | return False
156 | else:
157 | return self._status_from_parent(path[:-1], matching, other)
158 |
159 | def _propagate(self, node, matching, other, path):
160 | """recursively propagate matching
161 |
162 | return tuple: (
163 | node is matching,
164 | set of pathes of matching sub nodes,
165 | set of pathes of non matching sub nodes)
166 | """
167 | paths_ok = set() # path of nodes that are matching
168 | paths_ko = set() # path of nodes that are not matching
169 | children_status = [] # bool for each children, indicating if it matches or not
170 | # recurse children
171 | if node.children and not isinstance(node, self.NO_CHILDREN_PROPAGATE):
172 | for i, child in enumerate(node.children):
173 | child_ok, sub_ok, sub_ko = self._propagate(
174 | child, matching, other, path + (i,),
175 | )
176 | paths_ok.update(sub_ok)
177 | paths_ko.update(sub_ko)
178 | children_status.append(child_ok)
179 | # resolve node status
180 | if path in matching:
181 | node_ok = True
182 | elif children_status: # compute from children
183 | # compute parent success from children
184 | operator = any if isinstance(node, self.OR_NODES) else all
185 | node_ok = operator(children_status)
186 | else:
187 | node_ok = self._status_from_parent(path, matching, other)
188 | if isinstance(node, self.NEGATION_NODES):
189 | # negate result
190 | node_ok = not node_ok
191 | # add node to the right set
192 | target_set = paths_ok if node_ok else paths_ko
193 | target_set.add(path)
194 | # return result
195 | return node_ok, paths_ok, paths_ko
196 |
197 | def __call__(self, tree, matching, other=frozenset()):
198 | """
199 | Given a list of paths that are known to match,
200 | return all pathes in the tree that are matches.
201 |
202 | .. note:: we do not descend into nodes that are positive.
203 | Normally matching just provides nodes at the right levels
204 | for propagation to be effective.
205 | Descending would mean risking to give non consistent information.
206 |
207 | :param list matching: list of path of matching nodes (each path is a tuple)
208 | :param list other: list of other path that had a name, but were not reported as matching
209 |
210 | :return tuple: (
211 | set of matching path after propagation,
212 | set of non matching pathes after propagation)
213 | """
214 | tree_ok, paths_ok, paths_ko = self._propagate(tree, matching, other, ())
215 | return paths_ok, paths_ko
216 |
217 |
218 | class ExpressionMarker(PathTrackingTransformer):
219 | """A visitor to mark a tree based on elements belonging to a path or not
220 |
221 | One intended usage is to add marker around nodes matching a request,
222 | by altering tail and head of elements
223 | """
224 |
225 | def mark_node(self, node, path, *info):
226 | """implement this in your own code, maybe altering the head / tail arguments
227 | """
228 | return node
229 |
230 | def generic_visit(self, node, context):
231 | # we simply generate new_node and mark it
232 | new_node, = super().generic_visit(node, context)
233 | yield self.mark_node(new_node, context["path"], *context["info"])
234 |
235 | def __call__(self, tree, *info):
236 | return self.visit(tree, context={"info": info})
237 |
238 |
239 | class HTMLMarker(ExpressionMarker):
240 | """from paths that are ok or ko, add html elements with right class around elements
241 |
242 | :param str ok_class: class for elements in paths_ok
243 | :param str ko_class: class for elements in paths_ko
244 | :param str element: html element used to surround sub expressions
245 | """
246 |
247 | def __init__(self, ok_class="ok", ko_class="ko", element="span"):
248 | super().__init__()
249 | self.ok_class = ok_class
250 | self.ko_class = ko_class
251 | self.element = element
252 |
253 | def css_class(self, path, paths_ok, paths_ko):
254 | return self.ok_class if path in paths_ok else self.ko_class if path in paths_ko else None
255 |
256 | def mark_node(self, node, path, paths_ok, paths_ko, parcimonious):
257 | node_class = self.css_class(path, paths_ok, paths_ko)
258 | add_class = node_class is not None
259 | if add_class and parcimonious:
260 | # find nearest parent with a class
261 | parent_class = None
262 | parent_path = path
263 | while parent_class is None and parent_path:
264 | parent_path = parent_path[:-1]
265 | parent_class = self.css_class(parent_path, paths_ok, paths_ko)
266 | # only add class if different from parent
267 | add_class = node_class != parent_class
268 | if add_class:
269 | node.head = f'<{self.element} class="{node_class}">{node.head}'
270 | node.tail = f'{node.tail}{self.element}>'
271 | return node
272 |
273 | def __call__(self, tree, paths_ok, paths_ko, parcimonious=True):
274 | """representation of tree, adding html elements with right class around subexpressions
275 | according to their presence in paths_ok or paths_ko
276 |
277 | :param tree: a luqum tree
278 | :param paths_ok: set of path to nodes (express as tuple of int) that should get ok_class
279 | :param paths_ko: set of path to nodes that should get ko_class
280 | :param parcimonious: only add class when parent node does not have same class
281 |
282 | :return str: expression with html elements surrounding part of expression
283 | with right class attribute according to paths_ok and paths_ko
284 | """
285 | new_tree = super().__call__(tree, paths_ok, paths_ko, parcimonious)
286 | return new_tree.__str__(head_tail=True)
287 |
--------------------------------------------------------------------------------
/luqum/parsetab.py:
--------------------------------------------------------------------------------
1 |
2 | # parsetab.py
3 | # This file is automatically generated. Do not edit.
4 | # pylint: disable=W,C,R
5 | _tabversion = '3.10'
6 |
7 | _lr_method = 'LALR'
8 |
9 | _lr_signature = 'leftIMPLICIT_OPleftOR_OPleftAND_OPnonassocPLUSMINUSnonassocBOOSTnonassocTOrightUMINUSAND_OP APPROX BOOST COLUMN GREATERTHAN LBRACKET LESSTHAN LPAREN MINUS NOT OR_OP PHRASE PLUS RBRACKET REGEX RPAREN TERM TOexpression : expression OR_OP expressionexpression : expression AND_OP expressionexpression : expression expression %prec IMPLICIT_OPunary_expression : PLUS unary_expressionunary_expression : MINUS unary_expressionunary_expression : NOT unary_expressionexpression : unary_expressionunary_expression : LPAREN expression RPARENunary_expression : LBRACKET phrase_or_possibly_negative_term TO phrase_or_possibly_negative_term RBRACKETpossibly_negative_term : MINUS phrase_or_term %prec UMINUS\n | phrase_or_termphrase_or_possibly_negative_term : possibly_negative_term\n | PHRASEunary_expression : LESSTHAN phrase_or_termunary_expression : GREATERTHAN phrase_or_termunary_expression : TERM COLUMN unary_expressionunary_expression : PHRASEunary_expression : PHRASE APPROXunary_expression : unary_expression BOOSTunary_expression : TERMunary_expression : TERM APPROXunary_expression : REGEXunary_expression : TOphrase_or_term : TERM\n | PHRASE'
10 |
11 | _lr_action_items = {'PLUS':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[3,3,-7,3,3,3,3,-23,-20,-17,-22,3,3,3,-19,-4,-5,-6,3,-24,-14,-25,-15,3,-21,-18,3,3,-8,-16,-9,]),'MINUS':([0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,37,39,41,],[4,4,-7,4,4,4,4,25,-23,-20,-17,-22,4,4,4,-19,-4,-5,-6,4,-24,-14,-25,-15,4,-21,-18,4,4,-8,25,-16,-9,]),'NOT':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[5,5,-7,5,5,5,5,-23,-20,-17,-22,-3,5,5,-19,-4,-5,-6,5,-24,-14,-25,-15,5,-21,-18,-1,-2,-8,-16,-9,]),'LPAREN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[6,6,-7,6,6,6,6,-23,-20,-17,-22,-3,6,6,-19,-4,-5,-6,6,-24,-14,-25,-15,6,-21,-18,-1,-2,-8,-16,-9,]),'LBRACKET':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[7,7,-7,7,7,7,7,-23,-20,-17,-22,-3,7,7,-19,-4,-5,-6,7,-24,-14,-25,-15,7,-21,-18,-1,-2,-8,-16,-9,]),'LESSTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[9,9,-7,9,9,9,9,-23,-20,-17,-22,-3,9,9,-19,-4,-5,-6,9,-24,-14,-25,-15,9,-21,-18,-1,-2,-8,-16,-9,]),'GREATERTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[10,10,-7,10,10,10,10,-23,-20,-17,-22,-3,10,10,-19,-4,-5,-6,10,-24,-14,-25,-15,10,-21,-18,-1,-2,-8,-16,-9,]),'TERM':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[11,11,-7,11,11,11,11,27,-23,27,27,-20,-17,-22,-3,11,11,-19,-4,-5,-6,11,27,-24,-14,-25,-15,11,-21,-18,-1,-2,-8,27,-16,-9,]),'PHRASE':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[12,12,-7,12,12,12,12,24,-23,29,29,-20,-17,-22,-3,12,12,-19,-4,-5,-6,12,29,-24,-14,-25,-15,12,-21,-18,-1,-2,-8,24,-16,-9,]),'REGEX':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[13,13,-7,13,13,13,13,-23,-20,-17,-22,-3,13,13,-19,-4,-5,-6,13,-24,-14,-25,-15,13,-21,-18,-1,-2,-8,-16,-9,]),'TO':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,38,39,41,],[8,8,-7,8,8,8,8,-23,-20,-17,-22,8,8,8,-19,-4,-5,-6,8,37,-12,-13,-11,-24,-14,-25,-15,8,-21,-18,8,8,-8,-10,-16,-9,]),'$end':([1,2,8,11,12,13,14,17,18,19,20,27,28,29,30,32,33,34,35,36,39,41,],[0,-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'OR_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[15,-7,-23,-20,-17,-22,15,-19,-4,-5,-6,15,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'AND_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[16,-7,-23,-20,-17,-22,16,-19,-4,-5,-6,16,-24,-14,-25,-15,-21,-18,16,-2,-8,-16,-9,]),'RPAREN':([2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,36,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'BOOST':([2,8,11,12,13,17,18,19,20,27,28,29,30,32,33,36,39,41,],[17,-23,-20,-17,-22,-19,17,17,17,-24,-14,-25,-15,-21,-18,-8,17,-9,]),'COLUMN':([11,],[31,]),'APPROX':([11,12,],[32,33,]),'RBRACKET':([23,24,26,27,29,38,40,],[-12,-13,-11,-24,-25,-10,41,]),}
12 |
13 | _lr_action = {}
14 | for _k, _v in _lr_action_items.items():
15 | for _x,_y in zip(_v[0],_v[1]):
16 | if not _x in _lr_action: _lr_action[_x] = {}
17 | _lr_action[_x][_k] = _y
18 | del _lr_action_items
19 |
20 | _lr_goto_items = {'expression':([0,1,6,14,15,16,21,34,35,],[1,14,21,14,34,35,14,14,14,]),'unary_expression':([0,1,3,4,5,6,14,15,16,21,31,34,35,],[2,2,18,19,20,2,2,2,2,2,39,2,2,]),'phrase_or_possibly_negative_term':([7,37,],[22,40,]),'possibly_negative_term':([7,37,],[23,23,]),'phrase_or_term':([7,9,10,25,37,],[26,28,30,38,26,]),}
21 |
22 | _lr_goto = {}
23 | for _k, _v in _lr_goto_items.items():
24 | for _x, _y in zip(_v[0], _v[1]):
25 | if not _x in _lr_goto: _lr_goto[_x] = {}
26 | _lr_goto[_x][_k] = _y
27 | del _lr_goto_items
28 | _lr_productions = [
29 | ("S' -> expression","S'",1,None,None,None),
30 | ('expression -> expression OR_OP expression','expression',3,'p_expression_or','parser.py',254),
31 | ('expression -> expression AND_OP expression','expression',3,'p_expression_and','parser.py',260),
32 | ('expression -> expression expression','expression',2,'p_expression_implicit','parser.py',266),
33 | ('unary_expression -> PLUS unary_expression','unary_expression',2,'p_expression_plus','parser.py',272),
34 | ('unary_expression -> MINUS unary_expression','unary_expression',2,'p_expression_minus','parser.py',278),
35 | ('unary_expression -> NOT unary_expression','unary_expression',2,'p_expression_not','parser.py',284),
36 | ('expression -> unary_expression','expression',1,'p_expression_unary','parser.py',290),
37 | ('unary_expression -> LPAREN expression RPAREN','unary_expression',3,'p_grouping','parser.py',295),
38 | ('unary_expression -> LBRACKET phrase_or_possibly_negative_term TO phrase_or_possibly_negative_term RBRACKET','unary_expression',5,'p_range','parser.py',301),
39 | ('possibly_negative_term -> MINUS phrase_or_term','possibly_negative_term',2,'p_possibly_negative_term','parser.py',313),
40 | ('possibly_negative_term -> phrase_or_term','possibly_negative_term',1,'p_possibly_negative_term','parser.py',314),
41 | ('phrase_or_possibly_negative_term -> possibly_negative_term','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',323),
42 | ('phrase_or_possibly_negative_term -> PHRASE','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',324),
43 | ('unary_expression -> LESSTHAN phrase_or_term','unary_expression',2,'p_lessthan','parser.py',329),
44 | ('unary_expression -> GREATERTHAN phrase_or_term','unary_expression',2,'p_greaterthan','parser.py',336),
45 | ('unary_expression -> TERM COLUMN unary_expression','unary_expression',3,'p_field_search','parser.py',343),
46 | ('unary_expression -> PHRASE','unary_expression',1,'p_quoting','parser.py',352),
47 | ('unary_expression -> PHRASE APPROX','unary_expression',2,'p_proximity','parser.py',357),
48 | ('unary_expression -> unary_expression BOOST','unary_expression',2,'p_boosting','parser.py',363),
49 | ('unary_expression -> TERM','unary_expression',1,'p_terms','parser.py',369),
50 | ('unary_expression -> TERM APPROX','unary_expression',2,'p_fuzzy','parser.py',374),
51 | ('unary_expression -> REGEX','unary_expression',1,'p_regex','parser.py',380),
52 | ('unary_expression -> TO','unary_expression',1,'p_to_as_term','parser.py',386),
53 | ('phrase_or_term -> TERM','phrase_or_term',1,'p_phrase_or_term','parser.py',392),
54 | ('phrase_or_term -> PHRASE','phrase_or_term',1,'p_phrase_or_term','parser.py',393),
55 | ]
56 |
--------------------------------------------------------------------------------
/luqum/pretty.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """This module provides a pretty printer for lucene query tree.
3 | """
4 | from .tree import BaseOperation, BaseGroup, SearchField
5 |
6 |
7 | class _StickMarker:
8 | """Use in list between two elements that must stick together
9 | """
10 |
11 | def __len__(self):
12 | return 0
13 |
14 |
15 | # a marker to avoid a new line between two elements
16 | _STICK_MARKER = _StickMarker()
17 |
18 |
19 | class Prettifier(object):
20 | """Class to generate a pretty printer.
21 | """
22 |
23 | def __init__(self, indent=4, max_len=80, inline_ops=False):
24 | """
25 | The pretty printer factory.
26 |
27 | :param int indent: number of space for indentation
28 | :param int max_len: maximum line length in number of characters.
29 | Prettyfier will do its best to keep inside those margin,
30 | but as it can only split on operators, it may not be possible.
31 | :param bool inline_ops: if False (default) operators are printed on a new line
32 | if True, operators are printed at the end of the line.
33 | """
34 | self.indent = indent
35 | self.prefix = " " * self.indent
36 | self.max_len = max_len
37 | self.inline_ops = inline_ops
38 |
39 | def _get_chains(self, element, parent=None):
40 | """return a list of string and list, and recursively
41 |
42 | An inner list represent a level of indentation
43 | A string is information from the level
44 | """
45 | if isinstance(element, BaseOperation):
46 | if not isinstance(parent, BaseOperation) or element.op == parent.op:
47 | # same level, this is just associativity
48 | num_children = len(element.children)
49 | for n, child in enumerate(element.children):
50 | yield from self._get_chains(child, element)
51 | if n < num_children - 1:
52 | if self.inline_ops:
53 | yield _STICK_MARKER
54 | if element.op:
55 | yield element.op
56 | else:
57 | # another operation, raise level
58 | new_level = []
59 | num_children = len(element.children)
60 | for n, child in enumerate(element.children):
61 | new_level.extend(self._get_chains(child, element))
62 | if n < num_children - 1:
63 | if self.inline_ops:
64 | new_level.append(_STICK_MARKER)
65 | if element.op:
66 | new_level.append(element.op)
67 | yield new_level
68 | elif isinstance(element, BaseGroup):
69 | # raise level
70 | yield "("
71 | yield list(self._get_chains(element.expr, element))
72 | if self.inline_ops:
73 | yield _STICK_MARKER
74 | yield ")"
75 | elif isinstance(element, SearchField):
76 | # use recursion on sub expression
77 | yield element.name + ":"
78 | yield _STICK_MARKER
79 | yield from self._get_chains(element.expr, element)
80 | else:
81 | # simple element
82 | yield str(element)
83 |
84 | def _count_chars(self, element):
85 | """Replace each element by the element and a count of chars in it (and recursively)
86 |
87 | This will help, compute if elements can stand on a line or not
88 | """
89 | if isinstance(element, list):
90 | with_counts = [self._count_chars(c)for c in element]
91 | # when counting we add a space for joining
92 | return with_counts, sum(n + 1 for c, n in with_counts) - 1
93 | else:
94 | return element, len(element)
95 |
96 | def _apply_stick(self, elements):
97 | last = None
98 | sticking = False
99 | for current in elements:
100 | if current == _STICK_MARKER:
101 | assert last is not None, "_STICK_MARKER should never be first !"
102 | sticking = True
103 | elif sticking:
104 | last += " " + current
105 | sticking = False
106 | else:
107 | if last is not None:
108 | yield last
109 | last = current
110 | yield last
111 |
112 | def _concatenates(self, chain_with_counts, char_counts, level=0, in_one_liner=False):
113 | """taking the result of _get_chains after passing through _count_chars,
114 | arrange things, using newlines and indentation when necessary
115 |
116 | :return string: prettified expression
117 | """
118 | # evaluate if it's feasible in one-line
119 | one_liner = in_one_liner or char_counts < self.max_len - (self.indent * level)
120 | new_level = level if one_liner else level + 1
121 | elements = [
122 | self._concatenates(c, n, level=new_level, in_one_liner=one_liner)
123 | if isinstance(c, list)
124 | else c
125 | for c, n in chain_with_counts]
126 | elements = self._apply_stick(elements)
127 | prefix = self.prefix if level and not in_one_liner else ""
128 | join_char = " " if one_liner else ("\n" + prefix)
129 | return prefix + join_char.join(line for c in elements for line in c.split("\n"))
130 |
131 | def __call__(self, tree):
132 | """Pretty print the query represented by tree
133 |
134 | :param tree: a query tree using elements from :py:mod:`luqum.tree`
135 | """
136 | chains = list(self._get_chains(tree))
137 | chain_with_counts, total = self._count_chars(chains)
138 | return self._concatenates(chain_with_counts, total)
139 |
140 |
141 | prettify = Prettifier()
142 | """prettify function with default parameters
143 | """
144 |
--------------------------------------------------------------------------------
/luqum/thread.py:
--------------------------------------------------------------------------------
1 | import threading
2 |
3 | from . import parser
4 |
5 | thread_local = threading.local()
6 |
7 |
8 | def parse(input=None, lexer=None, debug=False, tracking=False):
9 | """A (hopefully) thread safe version of :py:meth:`luqum.parser.parse`
10 |
11 | PLY is not thread safe because of its lexer state, but cloning it we can be
12 | thread safe. see: https://github.com/jurismarches/luqum/issues/72
13 |
14 | Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used.
15 | They are still present for signature compatibility.
16 | """
17 | if not hasattr(thread_local, "lexer"):
18 | thread_local.lexer = parser.lexer.clone()
19 | return parser.parser.parse(input, lexer=thread_local.lexer)
20 |
--------------------------------------------------------------------------------
/luqum/visitor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """Base classes to implement a visitor pattern.
3 | """
4 |
5 |
6 | def camel_to_lower(name):
7 | return "".join(
8 | "_" + w.lower() if w.isupper() else w.lower()
9 | for w in name).lstrip("_")
10 |
11 |
12 | class TreeVisitor:
13 | """
14 | Tree Visitor base class.
15 |
16 | This class is meant to be subclassed, with the subclass implementing
17 | visitor methods for each Node type it is interested in.
18 |
19 | By default, those visitor method should be named ``'visit_'`` + class
20 | name of the node, converted to lower_case (ie: visit_search_node for a
21 | SearchNode class)[#tweakvisit]_.
22 |
23 | It's up to the visit method of each node to recursively call children (or not)
24 | It may be done simply by calling the generic_visit method.
25 |
26 | By default the `generic_visit`, simply trigger visit of subnodes, yielding no information.
27 |
28 | If the goal is to modify the initial tree, to get a new modified copy
29 | use :py:class:`TreeTranformer` instead.
30 |
31 | .. [#tweakvisit]: You can tweak this behaviour
32 | by overriding the `visitor_method_prefix` & `generic_visitor_method_name` class attributes.
33 |
34 | :param bool track_parents: if True the context will contain parents of current node as a list.
35 | It's up to you to maintain this list in your own methods.
36 | """
37 | visitor_method_prefix = 'visit_'
38 | generic_visitor_method_name = 'generic_visit'
39 |
40 | def __init__(self, track_parents=False):
41 | self.track_parents = track_parents
42 |
43 | _get_method_cache = None
44 |
45 | def _get_method(self, node):
46 | if self._get_method_cache is None:
47 | self._get_method_cache = {}
48 | try:
49 | meth = self._get_method_cache[type(node)]
50 | except KeyError:
51 | for cls in node.__class__.mro():
52 | try:
53 | method_name = "{}{}".format(
54 | self.visitor_method_prefix,
55 | camel_to_lower(cls.__name__)
56 | )
57 | meth = getattr(self, method_name)
58 | break
59 | except AttributeError:
60 | continue
61 | else:
62 | meth = getattr(self, self.generic_visitor_method_name)
63 | self._get_method_cache[type(node)] = meth
64 | return meth
65 |
66 | def visit(self, tree, context=None):
67 | """Traversal of tree
68 |
69 | :param luqum.tree.Item tree: a tree representing a lucene expression
70 | :param dict context: a dict with initial values for context
71 |
72 | .. note:: the values in context, are not guaranteed to move up the hierachy,
73 | because we do copy of context for children to have specific values.
74 |
75 | A trick you can use if you need values to move up the hierachy
76 | is to set a `"global"` key containing a dict, where you can store values.
77 | """
78 | if context is None:
79 | context = {}
80 | return list(self.visit_iter(tree, context=context))
81 |
82 | def visit_iter(self, node, context):
83 | """
84 | Basic, recursive traversal of the tree.
85 |
86 | :param list parents: the list of parents
87 | :param dict context: a dict of contextual variable for free use
88 | to track states while traversing the tree (eg. the current field name)
89 | """
90 | method = self._get_method(node)
91 | yield from method(node, context)
92 |
93 | def child_context(self, node, child, context, **kwargs):
94 | """Generate a context for children.
95 |
96 | The context children is distinct from its parent context,
97 | so that visit in a branch does not affect others.
98 |
99 | .. note:: If you need global parameters,
100 | a trick is to put them in dict in a "global" entry
101 | as we do a swallow copy of context, and not a deep one.
102 |
103 | :param luqum.tree.Item node: parent node
104 | :param luqum.tree.Item child: child node
105 | :param dict context: parent context
106 | :return dict: child context
107 | """
108 | child_context = dict(context)
109 | if self.track_parents:
110 | child_context["parents"] = context.get("parents", ()) + (node,)
111 | return child_context
112 |
113 | def generic_visit(self, node, context):
114 | """
115 | Default visitor function, called if nothing matches the current node.
116 |
117 | It simply visit children.
118 |
119 | :param luqum.tree.Item node: current node
120 | :param dict context: context (aka local parameters received from parents)
121 | """
122 | for child in node.children:
123 | child_context = self.child_context(node, child, context)
124 | yield from self.visit_iter(child, context=child_context)
125 |
126 |
127 | class TreeTransformer(TreeVisitor):
128 | """A version of TreeVisitor that is aimed at obtaining a transformed copy of tree.
129 |
130 | .. note:: It is far better to build a transformed copy,
131 | than to modify in place the original tree, as it is less error prone.
132 |
133 | :param bool track_new_parents: do we want to track new parents in the context ?
134 | """
135 |
136 | def __init__(self, track_new_parents=False, **kwargs):
137 | self.track_new_parents = track_new_parents
138 | super().__init__(**kwargs)
139 |
140 | def _clone_item(self, node):
141 | """simply call node.clone_item
142 |
143 | Surcharge this method to add specific tweaks if needed (like copying special attributes)
144 | """
145 | return node.clone_item()
146 |
147 | def visit(self, tree, context=None):
148 | """Visit the tree, by default building a copy and returning it.
149 |
150 | :param luqum.tree.Item tree: luqum expression tree
151 | :param context: optional initial context
152 | """
153 | if context is None:
154 | context = {}
155 | try:
156 | value, = self.visit_iter(tree, context=context)
157 | return value
158 | except ValueError as e:
159 | if str(e).startswith(("too many values to unpack", "not enough values to unpack")):
160 | exc = ValueError(
161 | "The visit of the tree should have produced exactly one element "
162 | "(the transformed tree)"
163 | )
164 | raise exc from e
165 | else:
166 | raise
167 |
168 | def child_context(self, node, child, context, **kwargs):
169 | child_context = super().child_context(node, child, context, **kwargs)
170 | if self.track_new_parents:
171 | child_context["new_parents"] = context.get("new_parents", ()) + (kwargs["new_node"],)
172 | return child_context
173 |
174 | def generic_visit(self, node, context):
175 | """
176 | Default visitor function, called if nothing matches the current node.
177 |
178 | It simply clone node and children
179 | """
180 | new_node = self._clone_item(node)
181 | new_node.children = list(self.clone_children(node, new_node, context))
182 | yield new_node
183 |
184 | def clone_children(self, node, new_node, context):
185 | """Helper to clone children.
186 |
187 | .. note:: a children may generate more than one children or none, for flexibility
188 | but it's up to the transformer to ensure everything is ok
189 | """
190 | for child in node.children:
191 | child_context = self.child_context(node, child, context, new_node=new_node)
192 | new_children = self.visit_iter(child, context=child_context)
193 | for new_child in new_children:
194 | yield new_child
195 |
196 |
197 | class PathTrackingMixin:
198 | """It can be useful to compute path of an element (as tuple of index in parent children)
199 |
200 | This mixin provides base components
201 | """
202 |
203 | def child_context(self, node, child, context, **kwargs):
204 | """Thanks to "path" and "position" in kwargs, we add the path of children
205 | """
206 | child_context = super().child_context(node, child, context, **kwargs)
207 | child_context["path"] = context["path"] + (kwargs["position"],)
208 | return child_context
209 |
210 | def visit(self, node, context=None):
211 | """visit the tree while tracking their path
212 | """
213 | if context is None:
214 | context = {}
215 | context["path"] = ()
216 | return super().visit(node, context=context)
217 |
218 |
219 | class PathTrackingVisitor(PathTrackingMixin, TreeVisitor):
220 | """Path tracking version of TreeVisitor
221 | """
222 |
223 | def generic_visit(self, node, context):
224 | for i, child in enumerate(node.children):
225 | child_context = self.child_context(node, child, context, position=i)
226 | yield from self.visit_iter(child, context=child_context)
227 |
228 |
229 | class PathTrackingTransformer(PathTrackingMixin, TreeTransformer):
230 | """Path tracking version of TreeTransformer
231 | """
232 |
233 | def clone_children(self, node, new_node, context):
234 | for i, child in enumerate(node.children):
235 | child_context = self.child_context(node, child, context, new_node=new_node, position=i)
236 | new_children = self.visit_iter(child, context=child_context)
237 | for new_child in new_children:
238 | yield new_child
239 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | addopts = '--doctest-modules --doctest-glob="test_*.rst" --cov=luqum --cov-branch --cov-report html --no-cov-on-fail'
3 | python_files = 'test_*.py tests.py'
4 |
5 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 |
3 | coverage==7.6.10
4 | elasticsearch-dsl==8.17.1
5 | flake8==7.1.1
6 | pytest==8.3.4
7 | pytest-cov==6.0.0
8 | Sphinx==8.1.3
9 | wheel==0.45.1
10 | build==1.2.2.post1
11 | twine==6.1.0
12 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ply==3.11
2 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length=100
3 | exclude=
4 | parsetab.py
5 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from setuptools import setup
3 |
4 | from luqum import __version__
5 |
6 |
7 | with open('README.rst', 'r') as f:
8 | long_description = f.read()
9 | with open('CHANGELOG.rst', 'r') as f:
10 | long_description += "\n\n" + f.read()
11 |
12 |
13 | setup(
14 | name='luqum',
15 | version=__version__,
16 | description="A Lucene query parser generating ElasticSearch queries and more !",
17 | long_description=long_description,
18 | author='Jurismarches',
19 | author_email='contact@jurismarches.com',
20 | url='https://github.com/jurismarches/luqum',
21 | packages=[
22 | 'luqum',
23 | 'luqum.elasticsearch'
24 | ],
25 | install_requires=[
26 | 'ply>=3.11',
27 | ],
28 | classifiers=[
29 | 'Development Status :: 4 - Beta',
30 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)',
31 | 'License :: OSI Approved :: Apache Software License',
32 | 'Intended Audience :: Developers',
33 | 'Programming Language :: Python',
34 | 'Programming Language :: Python :: 3.10',
35 | 'Programming Language :: Python :: 3.11',
36 | 'Programming Language :: Python :: 3.12',
37 | 'Programming Language :: Python :: 3.13',
38 | ],
39 | )
40 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/__init__.py
--------------------------------------------------------------------------------
/tests/alternative_lexer.py:
--------------------------------------------------------------------------------
1 | """
2 | Fake Lexer to test: [Multiple Parsers and
3 | Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
4 | """
5 |
6 | # List of token names. This is always required
7 | tokens = (
8 | "NUMBER",
9 | "PLUS",
10 | "MINUS",
11 | "TIMES",
12 | "DIVIDE",
13 | "LPAREN",
14 | "RPAREN",
15 | )
16 |
17 | # Regular expression rules for simple tokens
18 | t_PLUS = r"\+"
19 | t_MINUS = r"-"
20 | t_TIMES = r"\*"
21 | t_DIVIDE = r"/"
22 | t_LPAREN = r"\("
23 | t_RPAREN = r"\)"
24 |
25 |
26 | # A regular expression rule with some action code
27 | def t_NUMBER(t):
28 | r"\d+"
29 | t.value = int(t.value)
30 | return t
31 |
32 |
33 | # Define a rule so we can track line numbers
34 | def t_newline(t):
35 | r"\n+"
36 | t.lexer.lineno += len(t.value)
37 |
38 |
39 | # A string containing ignored characters (spaces and tabs)
40 | t_ignore = " \t"
41 |
42 |
43 | # Error handling rule
44 | def t_error(t):
45 | print("Illegal character '%s'" % t.value[0])
46 | t.lexer.skip(1)
47 |
--------------------------------------------------------------------------------
/tests/test_auto_head_tail.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from unittest import TestCase
3 |
4 | from luqum.tree import (
5 | SearchField, FieldGroup, Group, Word, Phrase, Proximity, Fuzzy, Range,
6 | Not, AndOperation, OrOperation, Plus, UnknownOperation)
7 | from luqum.auto_head_tail import auto_head_tail
8 |
9 |
10 | class AutoHeadTailTestCase(TestCase):
11 |
12 | def test_or_operation(self):
13 | tree = OrOperation(Word("foo"), Word("bar"), Word("baz"))
14 | self.assertEqual(str(tree), "fooORbarORbaz")
15 | self.assertEqual(str(auto_head_tail(tree)), "foo OR bar OR baz")
16 |
17 | def test_and_operation(self):
18 | tree = AndOperation(Word("foo"), Word("bar"), Word("baz"))
19 | self.assertEqual(str(tree), "fooANDbarANDbaz")
20 | self.assertEqual(str(auto_head_tail(tree)), "foo AND bar AND baz")
21 |
22 | def test_unknown_operation(self):
23 | tree = UnknownOperation(Word("foo"), Word("bar"), Word("baz"))
24 | self.assertEqual(str(tree), "foobarbaz")
25 | self.assertEqual(str(auto_head_tail(tree)), "foo bar baz")
26 |
27 | def test_range(self):
28 | tree = Range(Word("foo"), Word("bar"))
29 | self.assertEqual(str(tree), "[fooTObar]")
30 | self.assertEqual(str(auto_head_tail(tree)), "[foo TO bar]")
31 |
32 | def test_not(self):
33 | tree = Not(Word("foo"))
34 | self.assertEqual(str(tree), "NOTfoo")
35 | self.assertEqual(str(auto_head_tail(tree)), "NOT foo")
36 |
37 | def test_complex(self):
38 | tree = Group(
39 | OrOperation(
40 | SearchField(
41 | "foo",
42 | FieldGroup(UnknownOperation(Word("bar"), Range(Word("baz"), Word("spam")))),
43 | ),
44 | Not(Proximity(Phrase('"ham ham"'), 2)),
45 | Plus(Fuzzy(Word("hammer"), 3)),
46 | )
47 | )
48 | self.assertEqual(str(tree), '(foo:(bar[bazTOspam])ORNOT"ham ham"~2OR+hammer~3)')
49 | self.assertEqual(
50 | str(auto_head_tail(tree)),
51 | '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)',
52 | )
53 | # idem potent
54 | self.assertEqual(
55 | str(auto_head_tail(auto_head_tail(tree))),
56 | '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)',
57 | )
58 |
59 | def test_auto_head_tail_no_change_to_existing(self):
60 | tree = AndOperation(
61 | Range(Word("foo", tail="\t"), Word("bar", head="\n"), tail="\r"),
62 | Not(Word("baz", head="\t\t"), head="\n\n", tail="\r\r"),
63 | Word("spam", head="\t\n"),
64 | )
65 | self.assertEqual(str(tree), "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam")
66 | self.assertEqual(
67 | str(auto_head_tail(tree)),
68 | "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam"
69 | )
70 |
--------------------------------------------------------------------------------
/tests/test_deprecated_utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | .. todo:: split this file in multiple file: tree, lexer, parser
4 | """
5 | import collections
6 | import copy
7 | from unittest import TestCase
8 |
9 | from luqum.tree import Group, Word, Phrase, AndOperation, OrOperation
10 | from luqum.deprecated_utils import LuceneTreeVisitor, LuceneTreeTransformer, LuceneTreeVisitorV2
11 |
12 |
13 | class TreeVisitorTestCase(TestCase):
14 |
15 | class BasicVisitor(LuceneTreeVisitor):
16 | """ Dummy visitor, simply yielding a list of nodes. """
17 | def generic_visit(self, node, parents):
18 | yield node
19 |
20 | class MROVisitor(LuceneTreeVisitor):
21 |
22 | def visit_or_operation(self, node, parents=[]):
23 | return ["{} OR {}".format(*node.children)]
24 |
25 | def visit_base_operation(self, node, parents=[]):
26 | return ["{} BASE_OP {}".format(*node.children)]
27 |
28 | def visit_word(self, node, parents=[]):
29 | return [node.value]
30 |
31 | def test_generic_visit(self):
32 | tree = (
33 | AndOperation(
34 | Word("foo"),
35 | Word("bar")))
36 |
37 | visitor = LuceneTreeVisitor()
38 | nodes = list(visitor.visit(tree))
39 | self.assertEqual(nodes, [])
40 |
41 | def test_basic_traversal(self):
42 | tree = (
43 | AndOperation(
44 | Word("foo"),
45 | Word("bar")))
46 |
47 | visitor = self.BasicVisitor()
48 | nodes = list(visitor.visit(tree))
49 |
50 | self.assertListEqual(
51 | [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')],
52 | nodes)
53 |
54 | def test_mro(self):
55 | visitor = self.MROVisitor()
56 |
57 | tree = OrOperation(Word('a'), Word('b'))
58 | result = visitor.visit(tree)
59 | self.assertEqual(list(result), ['a OR b', 'a', 'b'])
60 |
61 | tree = AndOperation(Word('a'), Word('b'))
62 | result = visitor.visit(tree)
63 | self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b'])
64 |
65 |
66 | class TreeTransformerTestCase(TestCase):
67 |
68 | class BasicTransformer(LuceneTreeTransformer):
69 | """
70 | Dummy transformer that simply turn any Word node's value into "lol"
71 | """
72 | def visit_word(self, node, parent):
73 | return Word('lol')
74 |
75 | def visit_phrase(self, node, parent):
76 | return None
77 |
78 | class OrListOperation(OrOperation):
79 | """Dummy operation having list operands instead of tuple
80 | """
81 | def __init__(self, *args, **kwargs):
82 | super().__init__(*args, **kwargs)
83 | self.operands = list(self.operands)
84 |
85 | def test_basic_traversal(self):
86 | tree = (
87 | AndOperation(
88 | Word("foo"),
89 | Word("bar")))
90 |
91 | transformer = self.BasicTransformer()
92 | new_tree = transformer.visit(tree)
93 |
94 | self.assertEqual(
95 | new_tree,
96 | (AndOperation(
97 | Word("lol"),
98 | Word("lol"))))
99 |
100 | def test_no_transform(self):
101 | tree = AndOperation()
102 | transformer = self.BasicTransformer()
103 | new_tree = transformer.visit(tree)
104 | self.assertEqual(
105 | new_tree,
106 | AndOperation())
107 |
108 | def test_one_word(self):
109 | tree = Word("foo")
110 | transformer = self.BasicTransformer()
111 | new_tree = transformer.visit(tree)
112 | self.assertEqual(
113 | new_tree,
114 | Word("lol"))
115 |
116 | def test_removal(self):
117 | tree = (
118 | AndOperation(
119 | AndOperation(
120 | Word("foo"),
121 | Phrase('"bar"')),
122 | AndOperation(
123 | Phrase('"baz"'),
124 | Phrase('"biz"'))))
125 |
126 | transformer = self.BasicTransformer()
127 | new_tree = transformer.visit(tree)
128 |
129 | self.assertEqual(
130 | new_tree,
131 | (AndOperation(
132 | AndOperation(Word("lol")),
133 | AndOperation())))
134 |
135 | def test_operands_list(self):
136 | OrListOperation = self.OrListOperation
137 | tree = (
138 | OrListOperation(
139 | OrListOperation(
140 | Word("foo"),
141 | Phrase('"bar"')),
142 | OrListOperation(
143 | Phrase('"baz"'))))
144 |
145 | transformer = self.BasicTransformer()
146 | new_tree = transformer.visit(tree)
147 |
148 | self.assertEqual(
149 | new_tree,
150 | (OrListOperation(
151 | OrListOperation(Word("lol")),
152 | OrListOperation())))
153 |
154 | def test_silent_value_error(self):
155 | # in the case some attribute mislead the search for node do not raise
156 | tree = AndOperation(Word("a"), Word("b"))
157 | setattr(tree, "misleading1", ())
158 | setattr(tree, "misleading2", [])
159 | # hackishly patch __dict__ to be sure we have operands in right order for test
160 | tree.__dict__ = collections.OrderedDict(tree.__dict__)
161 | tree.__dict__['operands'] = tree.__dict__.pop('operands') # operands are now last
162 |
163 | transformer = self.BasicTransformer()
164 | new_tree = transformer.visit(tree)
165 |
166 | self.assertEqual(
167 | new_tree,
168 | AndOperation(Word("lol"), Word("lol")))
169 |
170 | def test_repeating_expression(self):
171 | # non regression test
172 | tree = AndOperation(
173 | Group(OrOperation(Word('bar'), Word('foo'))),
174 | Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))),
175 | )
176 | # basic transformer should not change tree
177 | same_tree = LuceneTreeTransformer().visit(copy.deepcopy(tree))
178 | self.assertEqual(same_tree, tree)
179 |
180 |
181 | class TreeVisitorV2TestCase(TestCase):
182 |
183 | class BasicVisitor(LuceneTreeVisitorV2):
184 | """ Dummy visitor, simply yielding a list of nodes. """
185 | def generic_visit(self, node, parents, context):
186 | yield node
187 | for c in node.children:
188 | yield from self.visit(c, parents + [node], context)
189 |
190 | class MROVisitor(LuceneTreeVisitorV2):
191 |
192 | def visit_or_operation(self, node, parents=[], context=None):
193 | return "{} OR {}".format(*[self.visit(c) for c in node.children])
194 |
195 | def visit_base_operation(self, node, parents=[], context=None):
196 | return "{} BASE_OP {}".format(*[self.visit(c) for c in node.children])
197 |
198 | def visit_word(self, node, parents=[], context=None):
199 | return node.value
200 |
201 | def test_basic_traversal(self):
202 | tree = (
203 | AndOperation(
204 | Word("foo"),
205 | Word("bar")))
206 |
207 | visitor = self.BasicVisitor()
208 | nodes = list(visitor.visit(tree))
209 |
210 | self.assertListEqual(
211 | [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')],
212 | nodes)
213 |
214 | def test_mro(self):
215 | visitor = self.MROVisitor()
216 |
217 | tree = OrOperation(Word('a'), Word('b'))
218 | result = visitor.visit(tree)
219 | self.assertEqual(result, 'a OR b')
220 |
221 | tree = OrOperation(AndOperation(Word('a'), Word('b')), Word('c'))
222 | result = visitor.visit(tree)
223 | self.assertEqual(result, 'a BASE_OP b OR c')
224 |
225 | def test_generic_visit_fails_by_default(self):
226 | visitor = self.MROVisitor()
227 | with self.assertRaises(AttributeError):
228 | visitor.visit(Phrase('"test"'))
229 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/test_elasticsearch/__init__.py
--------------------------------------------------------------------------------
/tests/test_elasticsearch/book.json:
--------------------------------------------------------------------------------
1 | {
2 | "books": [
3 | {
4 | "title": "Harry Potter and the Philosopher's Stone",
5 | "edition": "Bloomsbury",
6 | "author": {
7 | "name": "J. K. Rowling",
8 | "birthdate": "1965-07-31"
9 | },
10 | "illustrators": [
11 | {
12 | "name": "Thomas Taylor",
13 | "nationality": "UK",
14 | "birthdate": "1973-05-22"
15 | },
16 | {
17 | "name": "Mary GrandPré",
18 | "nationality":"US",
19 | "birthdate": "1954-02-13"
20 | }
21 | ],
22 | "publication_date": "1997-06-26",
23 | "n_pages": "223",
24 | "ref": "HP1"
25 | },
26 | {
27 | "title": "Harry Potter and the Chamber of Secrets",
28 | "edition": "Bloomsbury",
29 | "author": {
30 | "name": "J. K. Rowling",
31 | "birthdate": "1965-07-31"
32 | },
33 | "illustrators": [
34 | {
35 | "name": "Cliff Wright",
36 | "nationality": "UK",
37 | "birthdate": "1953-10-24"
38 | },
39 | {
40 | "name": "Mary GrandPré",
41 | "nationality": "US",
42 | "birthdate": "1954-02-13"
43 | }
44 | ],
45 | "publication_date": "1998-07-02",
46 | "n_pages": "251",
47 | "ref": "HP2"
48 | },
49 | {
50 | "title": "Harry Potter and the Prisoner of Azkaban",
51 | "edition": "Bloomsbury",
52 | "author": {
53 | "name": "J. K. Rowling",
54 | "birthdate": "1965-07-31"
55 | },
56 | "illustrators": [
57 | {
58 | "name": "Cliff Wright",
59 | "nationality": "UK",
60 | "birthdate": "1953-10-24"
61 | },
62 | {
63 | "name": "Mary GrandPré",
64 | "nationality": "US",
65 | "birthdate": "1954-02-13"
66 | }
67 | ],
68 | "publication_date": "1999-07-08",
69 | "n_pages": "317",
70 | "ref": "HP3"
71 | },
72 | {
73 | "title": "Harry Potter and the Goblet of Fire",
74 | "edition": "Bloomsbury",
75 | "author": {
76 | "name": "J. K. Rowling",
77 | "birthdate": "1965-07-31"
78 | },
79 | "illustrators": [
80 | {
81 | "name": "Giles Greenfield",
82 | "nationality": "UK"
83 | },
84 | {
85 | "name": "Mary GrandPré",
86 | "nationality": "US",
87 | "birthdate": "1954-02-13"
88 | }
89 | ],
90 | "publication_date": "2000-07-08",
91 | "n_pages": "636",
92 | "ref": "HP4"
93 | },
94 | {
95 | "title": "Harry Potter and the Order of the Phoenix",
96 | "edition": "Bloomsbury",
97 | "author": {
98 | "name": "J. K. Rowling",
99 | "birthdate": "1965-07-31"
100 | },
101 | "illustrators": [
102 | {
103 | "name":"Jason Cockcroft",
104 | "nationality":"UK"
105 | },
106 | {
107 | "name": "Mary GrandPré",
108 | "nationality": "US",
109 | "birthdate": "1954-02-13"
110 | }
111 | ],
112 | "publication_date": "2003-06-21",
113 | "n_pages": "766",
114 | "ref": "HP5"
115 | },
116 | {
117 | "title": "Harry Potter and the Half-Blood Prince",
118 | "edition": "Bloomsbury",
119 | "author": {
120 | "name": "J. K. Rowling",
121 | "birthdate": "1965-07-31"
122 | },
123 | "illustrators": [
124 | {
125 | "name": "Jason Cockcroft",
126 | "nationality": "UK"
127 | },
128 | {
129 | "name": "Mary GrandPré",
130 | "nationality": "US",
131 | "birthdate": "1954-02-13"
132 | }
133 | ],
134 | "publication_date": "2005-07-16",
135 | "n_pages": "607",
136 | "ref": "HP6"
137 | },
138 | {
139 | "title": "Harry Potter and the Deathly Hallows",
140 | "edition": "Bloomsbury",
141 | "author": {
142 | "name": "J. K. Rowling",
143 | "birthdate": "1965-07-31"
144 | },
145 | "illustrators": [
146 | {
147 | "name": "Jason Cockcroft",
148 | "nationality": "UK"
149 | },
150 | {
151 | "name": "Mary GrandPré",
152 | "nationality": "US",
153 | "birthdate": "1954-02-13"
154 | }
155 | ],
156 | "publication_date": "2007-07-21",
157 | "n_pages": "607",
158 | "ref": "HP7"
159 | },
160 | {
161 | "title": "Harry Potter and the Cursed Child",
162 | "edition": "Little, Brown and Company",
163 | "author": {
164 | "name": "J. K. Rowling",
165 | "birthdate": "1965-07-31"
166 | },
167 | "illustrators": [],
168 | "publication_date": "2016-07-30",
169 | "n_pages": "360",
170 | "ref": "HP8"
171 | },
172 | {
173 | "title": "The Tales of Beedle the Bard",
174 | "edition": "Lumos (charity)",
175 | "author": {
176 | "name": "J. K. Rowling",
177 | "birthdate": "1965-07-31"
178 | },
179 | "illustrators": [
180 | {
181 | "name":"J. K. Rowling",
182 | "nationality": "UK",
183 | "birthdate": "1965-07-31"
184 | }
185 | ],
186 | "publication_date": "2008-12-04",
187 | "n_pages": "157",
188 | "ref": "BB1"
189 | }
190 | ]
191 | }
192 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/es_integration_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import elasticsearch_dsl
5 | from elasticsearch.exceptions import ConnectionError
6 | from elasticsearch.helpers import bulk
7 | from elasticsearch_dsl import Date, Index, Integer, Nested, Object, Search, analyzer
8 | from elasticsearch_dsl.connections import connections
9 |
10 | from luqum.elasticsearch import ElasticsearchQueryBuilder, SchemaAnalyzer
11 |
12 |
13 | MAJOR_ES = elasticsearch_dsl.VERSION[0]
14 | if MAJOR_ES > 2:
15 | from elasticsearch_dsl import Keyword
16 |
17 | ES6 = False
18 | if MAJOR_ES >= 6:
19 | from elasticsearch_dsl import Text, Document, InnerDoc
20 |
21 | ES6 = True
22 | else:
23 | from elasticsearch_dsl import (
24 | String as Text,
25 | DocType as Document,
26 | InnerObjectWrapper as InnerDoc,
27 | )
28 |
29 |
30 | def get_es():
31 | """Return an es connection or None if none seems available.
32 |
33 | Also wait for ES to be ready (yellow status)
34 | """
35 | # you may use ES_HOST environment variable to configure Elasticsearch
36 | # launching something like
37 | # docker run --rm -p "127.0.0.1:9200:9200" -e "discovery.type=single-node" elasticsearch:7.8.0
38 | # is a simple way to get an instance
39 | connections.configure(
40 | default=dict(hosts=os.environ.get("ES_HOST", "http://localhost:9200"), timeout=20)
41 | )
42 | try:
43 | client = connections.get_connection("default")
44 | # check ES running
45 | client.cluster.health(wait_for_status='yellow')
46 | except ConnectionError:
47 | client = None
48 | return client
49 |
50 |
51 | if MAJOR_ES > 2:
52 |
53 | class Illustrator(InnerDoc):
54 | """Inner object to be nested in Book, details on an illustrator
55 | """
56 | name = Text()
57 | birthdate = Date()
58 | nationality = Keyword()
59 |
60 |
61 | class Book(Document):
62 | """An objects representing a book in ES
63 | """
64 | title = Text(fields={
65 | "no_vowels": Text(
66 | analyzer=analyzer("no_vowels", "pattern", pattern=r"[\Waeiouy]"), # noqa: W605
67 | search_analyzer="standard"
68 | )
69 | })
70 | ref = Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed")
71 | edition = Text()
72 | author = Object(properties={"name": Text(), "birthdate": Date()})
73 | publication_date = Date()
74 | n_pages = Integer()
75 |
76 | if ES6:
77 | illustrators = Nested(Illustrator)
78 |
79 | class Index:
80 | name = "bk"
81 |
82 | else:
83 | illustrators = Nested(
84 | properties={
85 | "name": Text(),
86 | "birthdate": Date(),
87 | "nationality": Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed"),
88 | }
89 | )
90 |
91 | class Meta:
92 | index = "bk"
93 |
94 |
95 | def add_book_data(es):
96 | """Create a "bk" index and fill it with data
97 | """
98 | remove_book_index(es)
99 | Book.init()
100 | with open(os.path.join(os.path.dirname(__file__), "book.json")) as f:
101 | datas = json.load(f)
102 | actions = (
103 | {"_op_type": "index", "_id": i, "_source": d}
104 | for i, d in enumerate(datas["books"])
105 | )
106 | if MAJOR_ES >= 7:
107 | bulk(es, actions, index="bk", refresh=True)
108 | else:
109 | if ES6:
110 | doc_type = "doc"
111 | else:
112 | doc_type = "book"
113 | bulk(es, actions, index="bk", doc_type=doc_type, refresh=True)
114 |
115 |
116 | def book_search(es):
117 | """Return an elasticsearch_dsl search object
118 | """
119 | return Search(using=es, index="bk")
120 |
121 |
122 | def book_query_builder(es):
123 | """Return an ElasticsearchQueryBuilder adapted for search in book.
124 |
125 | title is adapted to search the title.no_wowels field along with the title
126 | """
127 | MESSAGES_SCHEMA = {"mappings": Book._doc_type.mapping.to_dict()}
128 | schema_analizer = SchemaAnalyzer(MESSAGES_SCHEMA)
129 | builder_options = schema_analizer.query_builder_options()
130 | builder_options['field_options'] = {
131 | 'title.no_vowels': {
132 | 'match_type': 'multi_match',
133 | 'type': 'most_fields',
134 | 'fields': ('title', 'title.no_vowels')
135 | }
136 | }
137 | return ElasticsearchQueryBuilder(**builder_options)
138 |
139 |
140 | def remove_book_index(es):
141 | """clean "bk" index
142 | """
143 | if es is None:
144 | return
145 | if ES6:
146 | Book._index.delete(ignore=404)
147 | else:
148 | Index("bk").delete(ignore=404)
149 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_es_integration.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase, skipIf
2 |
3 | from luqum.parser import parser
4 |
5 | from .es_integration_utils import (
6 | add_book_data, book_query_builder, book_search, get_es, remove_book_index,
7 | )
8 |
9 |
10 | @skipIf(get_es() is None, "Skipping ES test as ES seems unreachable")
11 | class LuqumRequestTestCase(TestCase):
12 |
13 | @classmethod
14 | def setUpClass(cls):
15 | cls.es_client = get_es()
16 | if cls.es_client is None:
17 | return
18 | cls.es_builder = book_query_builder(cls.es_client)
19 | cls.search = book_search(cls.es_client)
20 | add_book_data(cls.es_client)
21 |
22 | def _ask_luqum(self, req):
23 | tree = parser.parse(req)
24 | query = self.es_builder(tree)
25 | return [x.title for x in self.search.filter(query).execute()]
26 |
27 | def test_simple_field_search(self):
28 | self.assertListEqual(
29 | self._ask_luqum('title:"Chamber"'),
30 | ["Harry Potter and the Chamber of Secrets"],
31 | )
32 |
33 | def test_nested_field_search(self):
34 | self.assertListEqual(
35 | self._ask_luqum("illustrators:(name:Giles)"),
36 | ["Harry Potter and the Goblet of Fire"],
37 | )
38 |
39 | def test_or_condition_search(self):
40 | self.assertCountEqual(
41 | self._ask_luqum(
42 | 'illustrators:(name:"Giles Greenfield" OR name:"Cliff Wright")'
43 | ),
44 | [
45 | "Harry Potter and the Prisoner of Azkaban",
46 | "Harry Potter and the Chamber of Secrets",
47 | "Harry Potter and the Goblet of Fire",
48 | ],
49 | )
50 |
51 | def test_and_condition_search(self):
52 | self.assertCountEqual(
53 | self._ask_luqum(
54 | 'illustrators:(name:"Cliff Wright") AND illustrators:(name:"Mary GrandPré")'
55 | ),
56 | [
57 | "Harry Potter and the Prisoner of Azkaban",
58 | "Harry Potter and the Chamber of Secrets",
59 | ],
60 | )
61 |
62 | def test_date_range_search(self):
63 | self.assertCountEqual(
64 | self._ask_luqum("publication_date:[2005-01-01 TO 2010-12-31]"),
65 | [
66 | "Harry Potter and the Half-Blood Prince",
67 | "The Tales of Beedle the Bard",
68 | "Harry Potter and the Deathly Hallows",
69 | ],
70 | )
71 |
72 | def test_int_range_search(self):
73 | self.assertCountEqual(
74 | self._ask_luqum("n_pages:[500 TO *]"),
75 | [
76 | "Harry Potter and the Half-Blood Prince",
77 | "Harry Potter and the Order of the Phoenix",
78 | "Harry Potter and the Deathly Hallows",
79 | "Harry Potter and the Goblet of Fire",
80 | ],
81 | )
82 |
83 | def test_int_search(self):
84 | self.assertListEqual(
85 | self._ask_luqum("n_pages:360"), ["Harry Potter and the Cursed Child"]
86 | )
87 |
88 | def test_proximity_search(self):
89 | self.assertListEqual(
90 | self._ask_luqum('title:"Harry Secrets"~5'),
91 | ["Harry Potter and the Chamber of Secrets"],
92 | )
93 |
94 | def test_fuzzy_search(self):
95 | self.assertListEqual(
96 | self._ask_luqum("title:Gublet~2"), ["Harry Potter and the Goblet of Fire"]
97 | )
98 |
99 | def test_object_field_search(self):
100 | self.assertListEqual(
101 | self._ask_luqum('illustrators:(name:"J. K. Rowling")'),
102 | ["The Tales of Beedle the Bard"],
103 | )
104 |
105 | def test_fail_search(self):
106 | self.assertListEqual(self._ask_luqum("title:secret"), [])
107 |
108 | def test_wildcard_matching(self):
109 | self.assertListEqual(
110 | self._ask_luqum("title:secret*"),
111 | ["Harry Potter and the Chamber of Secrets"],
112 | )
113 |
114 | def test_wildcard1_search(self):
115 | self.assertListEqual(
116 | self._ask_luqum("title:P*ix"), ["Harry Potter and the Order of the Phoenix"]
117 | )
118 |
119 | def test_not_search(self):
120 | self.assertListEqual(
121 | self._ask_luqum("-title:Harry"), ["The Tales of Beedle the Bard"]
122 | )
123 |
124 | def test_not_analysed_field_search(self):
125 | self.assertListEqual(self._ask_luqum("illustrators:nationality:uk"), [])
126 |
127 | def test_complex_search(self):
128 | self.assertListEqual(
129 | self._ask_luqum(
130 | """
131 | title:phoenux~2 AND
132 | illustrators:name:Grand* AND
133 | illustrators:(
134 | -name:grandpr* AND (
135 | name:J*on OR birthdate:[1950-01-01 TO 1970-01-01]
136 | )
137 | )
138 | """
139 | ),
140 | ["Harry Potter and the Order of the Phoenix"],
141 | )
142 |
143 | def test_subfield_multi_match_search(self):
144 | self.assertListEqual(
145 | self._ask_luqum("title.no_vowels:Potter AND title.no_vowels:x"),
146 | ["Harry Potter and the Order of the Phoenix"],
147 | )
148 |
149 | @classmethod
150 | def tearDownClass(cls):
151 | remove_book_index(cls.es_client)
152 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_estree.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.elasticsearch.tree import EShould, EWord
4 |
5 |
6 | class TestItems(TestCase):
7 |
8 | def test_should_operation_options(self):
9 | op = EShould(items=[EWord(q="a"), EWord(q="b"), EWord(q="c")], minimum_should_match=2)
10 | self.assertEqual(
11 | op.json,
12 | {'bool': {
13 | 'should': [
14 | {'term': {'': {'value': 'a'}}},
15 | {'term': {'': {'value': 'b'}}},
16 | {'term': {'': {'value': 'c'}}},
17 | ],
18 | 'minimum_should_match': 2,
19 | }},
20 | )
21 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_naming.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.tree import (
4 | AndOperation, Word, Prohibit, OrOperation, Not, Phrase, SearchField,
5 | UnknownOperation, Boost, Fuzzy, Proximity, Range, Group, FieldGroup,
6 | Plus)
7 | from luqum.naming import auto_name, set_name
8 | from luqum.elasticsearch.visitor import ElasticsearchQueryBuilder
9 |
10 |
11 | class ElasticsearchTreeTransformerTestCase(TestCase):
12 |
13 | @classmethod
14 | def setUpClass(cls):
15 | cls.transformer = ElasticsearchQueryBuilder(
16 | default_field="text",
17 | not_analyzed_fields=['not_analyzed_field', 'text', 'author.tag'],
18 | nested_fields={
19 | 'author': ['name', 'tag']
20 | },
21 | object_fields=["book.title", "author.rewards.name"],
22 | sub_fields=["book.title.raw"],
23 | )
24 |
25 | def test_named_queries_match(self):
26 | tree = SearchField("spam", Word("bar"))
27 | set_name(tree, "a")
28 | result = self.transformer(tree)
29 | self.assertEqual(
30 | result,
31 | {
32 | "match": {
33 | "spam": {
34 | "query": "bar",
35 | "_name": "a",
36 | "zero_terms_query": "none",
37 | },
38 | },
39 | },
40 | )
41 |
42 | tree = SearchField("spam", Phrase('"foo bar"'))
43 | set_name(tree, "a")
44 | result = self.transformer(tree)
45 | self.assertEqual(
46 | result,
47 | {
48 | "match_phrase": {
49 | "spam": {
50 | "query": "foo bar",
51 | "_name": "a",
52 | },
53 | },
54 | },
55 | )
56 |
57 | def test_named_queries_term(self):
58 | tree = SearchField("text", Word("bar"))
59 | set_name(tree, "a")
60 | result = self.transformer(tree)
61 | self.assertEqual(
62 | result,
63 | {"term": {"text": {"value": "bar", "_name": "a"}}},
64 | )
65 |
66 | tree = SearchField("text", Phrase('"foo bar"'))
67 | set_name(tree, "a")
68 | result = self.transformer(tree)
69 | self.assertEqual(
70 | result,
71 | {"term": {"text": {"value": "foo bar", "_name": "a"}}},
72 | )
73 |
74 | def test_named_queries_fuzzy(self):
75 | tree = SearchField("text", Fuzzy(Word('bar')))
76 | set_name(tree.children[0], "a")
77 | result = self.transformer(tree)
78 | self.assertEqual(
79 | result,
80 | {"fuzzy": {"text": {"value": "bar", "_name": "a", 'fuzziness': 0.5}}},
81 | )
82 |
83 | def test_named_queries_proximity(self):
84 | tree = SearchField("spam", Proximity(Phrase('"foo bar"')))
85 | set_name(tree.children[0], "a")
86 | result = self.transformer(tree)
87 | self.assertEqual(
88 | result,
89 | {"match_phrase": {"spam": {"query": "foo bar", "_name": "a", 'slop': 1.0}}},
90 | )
91 |
92 | def test_named_queries_boost(self):
93 | tree = SearchField("text", Boost(Phrase('"foo bar"'), force=2))
94 | set_name(tree.children[0], "a")
95 | result = self.transformer(tree)
96 | self.assertEqual(
97 | result,
98 | {"term": {"text": {"value": "foo bar", "_name": "a", 'boost': 2.0}}},
99 | )
100 |
101 | def test_named_queries_or(self):
102 | tree = OrOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
103 | set_name(tree.operands[0], "a")
104 | set_name(tree.operands[1], "b")
105 | result = self.transformer(tree)
106 | self.assertEqual(
107 | result,
108 | {'bool': {'should': [
109 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
110 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}}
111 | ]}}
112 | )
113 |
114 | def test_named_queries_and(self):
115 | tree = AndOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
116 | set_name(tree.operands[0], "a")
117 | set_name(tree.operands[1], "b")
118 | result = self.transformer(tree)
119 | self.assertEqual(
120 | result,
121 | {'bool': {'must': [
122 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
123 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'all'}}}
124 | ]}}
125 | )
126 |
127 | def test_named_queries_unknown(self):
128 | tree = UnknownOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
129 | set_name(tree.operands[0], "a")
130 | set_name(tree.operands[1], "b")
131 | result = self.transformer(tree)
132 | self.assertEqual(
133 | result,
134 | {'bool': {'should': [
135 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
136 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}}
137 | ]}}
138 | )
139 |
140 | def test_named_queries_not(self):
141 | tree = Not(SearchField("text", Word("foo")))
142 | set_name(tree, "a")
143 | result = self.transformer(tree)
144 | self.assertEqual(
145 | result,
146 | {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
147 | )
148 |
149 | tree = Prohibit(SearchField("text", Word("foo")))
150 | set_name(tree, "a")
151 | result = self.transformer(tree)
152 | self.assertEqual(
153 | result,
154 | {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
155 | )
156 |
157 | def test_named_queries_plus(self):
158 | tree = Plus(SearchField("text", Word("foo")))
159 | set_name(tree, "a")
160 | result = self.transformer(tree)
161 | self.assertEqual(
162 | result,
163 | {'bool': {'must': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
164 | )
165 |
166 | def test_named_queries_range(self):
167 | tree = SearchField("text", Range(Word("x"), Word("z")))
168 | set_name(tree, "a")
169 | result = self.transformer(tree)
170 | self.assertEqual(result, {'range': {'text': {'_name': 'a', 'gte': 'x', 'lte': 'z'}}})
171 |
172 | def test_named_queries_nested(self):
173 | tree = SearchField("author.name", Word("Monthy"))
174 | set_name(tree, "a")
175 | result = self.transformer(tree)
176 | # name is repeated on query, but it's not a big deal…
177 | self.assertEqual(
178 | result,
179 | {
180 | 'nested': {
181 | '_name': 'a',
182 | 'path': 'author',
183 | 'query': {'match': {'author.name': {
184 | '_name': 'a', 'query': 'Monthy', 'zero_terms_query': 'none',
185 | }}},
186 | },
187 | }
188 | )
189 |
190 | def test_named_queries_object(self):
191 | tree = SearchField("book.title", Word("Circus"))
192 | set_name(tree, "a")
193 | result = self.transformer(tree)
194 | # name is repeated on query, but it's not a big deal…
195 | self.assertEqual(
196 | result,
197 | {
198 | 'match': {'book.title': {
199 | '_name': 'a', 'query': 'Circus', 'zero_terms_query': 'none'
200 | }}
201 | }
202 | )
203 |
204 | def test_named_queries_group(self):
205 | tree = SearchField("text", FieldGroup(Word("bar")))
206 | set_name(tree.children[0], "a")
207 | result = self.transformer(tree)
208 | self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},)
209 |
210 | tree = Group(SearchField("text", Word("bar")))
211 | set_name(tree, "a")
212 | result = self.transformer(tree)
213 | self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},)
214 |
215 | def test_named_queries_exists(self):
216 | tree = SearchField("text", Word("*"))
217 | set_name(tree.children[0], "a")
218 | result = self.transformer(tree)
219 | self.assertEqual(result, {"exists": {"field": "text", "_name": "a"}},)
220 |
221 | def test_named_queries_complex(self):
222 | tree = (
223 | AndOperation(
224 | SearchField("text", Phrase('"foo bar"')),
225 | Group(
226 | OrOperation(
227 | Word("bar"),
228 | SearchField("spam", Word("baz")),
229 | ),
230 | ),
231 | )
232 | )
233 | and_op = tree
234 | search_text = and_op.operands[0]
235 | or_op = and_op.operands[1].children[0]
236 | bar = or_op.operands[0]
237 | search_spam = or_op.operands[1]
238 | set_name(search_text, "foo_bar")
239 | set_name(bar, "bar")
240 | set_name(search_spam, "baz")
241 |
242 | expected = {
243 | 'bool': {'must': [
244 | {'term': {'text': {'_name': 'foo_bar', 'value': 'foo bar'}}},
245 | {'bool': {'should': [
246 | {'term': {'text': {'_name': 'bar', 'value': 'bar'}}},
247 | {'match': {'spam': {
248 | '_name': 'baz',
249 | 'query': 'baz',
250 | 'zero_terms_query': 'none'
251 | }}}
252 | ]}}
253 | ]}
254 | }
255 |
256 | result = self.transformer(tree)
257 | self.assertEqual(result, expected)
258 |
259 | def test_auto_name_integration(self):
260 | tree = (
261 | AndOperation(
262 | SearchField("text", Phrase('"foo bar"')),
263 | Group(
264 | OrOperation(
265 | Word("bar"),
266 | SearchField("spam", Word("baz")),
267 | ),
268 | ),
269 | )
270 | )
271 | auto_name(tree)
272 |
273 | expected = {
274 | 'bool': {'must': [
275 | {'term': {'text': {'_name': 'a', 'value': 'foo bar'}}},
276 | {'bool': {'should': [
277 | {'term': {'text': {'_name': 'c', 'value': 'bar'}}},
278 | {'match': {'spam': {
279 | '_name': 'd',
280 | 'query': 'baz',
281 | 'zero_terms_query': 'none'
282 | }}}
283 | ]}}
284 | ]}
285 | }
286 |
287 | result = self.transformer(tree)
288 | self.assertEqual(result, expected)
289 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_nested.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.elasticsearch.nested import extract_nested_queries, get_first_name
4 |
5 |
6 | class NestedQueriesTestCase(TestCase):
7 |
8 | def test_no_nested(self):
9 | queries = extract_nested_queries({"term": {"text": {"value": "spam", "_name": "spam"}}})
10 | self.assertEqual(queries, [])
11 |
12 | queries = extract_nested_queries(
13 | {"bool": {"must": [
14 | {"term": {"text": {"value": "spam", "_name": "spam"}}},
15 | {"term": {"text": {"value": "ham", "_name": "ham"}}},
16 | ]}}
17 | )
18 | self.assertEqual(queries, [])
19 |
20 | def test_nested_no_bool_inside(self):
21 | queries = extract_nested_queries(
22 | {"nested": {
23 | "path": "my",
24 | "query": {"term": {"text": {"value": "spam", "_name": "spam"}}}
25 | }}
26 | )
27 | self.assertEqual(queries, [])
28 |
29 | def test_nested_bool_inside(self):
30 | term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
31 | term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}}
32 | bool_query = {"bool": {"must": [term1, term2]}}
33 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query}})
34 | self.assertEqual(
35 | queries,
36 | [
37 | {"nested": {"path": "my", "query": term1, "_name": "spam"}},
38 | {"nested": {"path": "my", "query": term2, "_name": "ham"}},
39 | ],
40 | )
41 |
42 | def test_nested_in_bool_with_bool_inside(self):
43 | term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
44 | term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}}
45 | term3 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
46 | bool_query = {"bool": {"must": [term1, term2]}}
47 | queries = extract_nested_queries(
48 | {"bool": {"should": [term3, {"nested": {"path": "my", "query": bool_query}}]}}
49 | )
50 | self.assertEqual(
51 | queries,
52 | [
53 | {"nested": {"path": "my", "query": term1, "_name": "spam"}},
54 | {"nested": {"path": "my", "query": term2, "_name": "ham"}},
55 | ],
56 | )
57 |
58 | def test_nested_bool_inside_bool(self):
59 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
60 | term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}}
61 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
62 | bool_query1 = {"bool": {"should": [term1, term2]}}
63 | bool_query2 = {"bool": {"must": [term3, bool_query1]}}
64 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}})
65 | self.assertEqual(queries, [
66 | {"nested": {"path": "my", "query": term3, "_name": "spam"}},
67 | {"nested": {"path": "my", "query": bool_query1}},
68 | {"nested": {"path": "my", "query": term1, "_name": "bar"}},
69 | {"nested": {"path": "my", "query": term2, "_name": "baz"}},
70 | ])
71 |
72 | def test_nested_inside_nested(self):
73 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
74 | term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}}
75 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
76 | bool_query1 = {"bool": {"should": [term1, term2]}}
77 | inner_nested = {"nested": {"path": "my.your", "query": bool_query1}}
78 | bool_query2 = {"bool": {"must": [term3, inner_nested]}}
79 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}})
80 | self.assertEqual(queries, [
81 | {"nested": {"path": "my", "query": term3, "_name": "spam"}},
82 | {"nested": {"path": "my", "query": inner_nested}},
83 | {"nested": {"path": "my", "_name": "bar", "query": {"nested": {
84 | "path": "my.your", "query": term1,
85 | }}}},
86 | {"nested": {"path": "my", "_name": "baz", "query": {"nested": {
87 | "path": "my.your", "query": term2,
88 | }}}},
89 | ])
90 |
91 | def test_nested_inside_nested_with_nested_bool(self):
92 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
93 | term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
94 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
95 | bool_query1 = {"bool": {"must_not": [term1]}}
96 | bool_query2 = {"bool": {"should": [term2, bool_query1]}}
97 | inner_nested = {"nested": {"path": "my.your", "query": bool_query2}}
98 | bool_query3 = {"bool": {"must_not": [inner_nested]}}
99 | bool_query4 = {"bool": {"must": [term3, bool_query3]}}
100 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}})
101 | self.assertEqual(queries, [
102 | {"nested": {"path": "my", "query": term3, "_name": "spam"}},
103 | {"nested": {"path": "my", "query": bool_query3}},
104 | {"nested": {"path": "my", "query": inner_nested}},
105 | {"nested": {"path": "my", "_name": "foo", "query": {
106 | "nested": {"path": "my.your", "query": term2}
107 | }}},
108 | {"nested": {
109 | "path": "my", "query": {"nested": {"path": "my.your", "query": bool_query1}},
110 | }},
111 | {"nested": {"path": "my", "_name": "bar", "query": {
112 | "nested": {"path": "my.your", "query": term1}
113 | }}},
114 | ])
115 |
116 | def test_multiple_parallel_nested(self):
117 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
118 | term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
119 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
120 | bool_query1 = {"bool": {"should": [term1]}}
121 | bool_query2 = {"bool": {"must_not": [term2]}}
122 | nested1 = {"nested": {"path": "my.your", "query": bool_query1}}
123 | nested2 = {"nested": {"path": "my.his", "query": bool_query2}}
124 | bool_query3 = {"bool": {"should": [nested2, nested1]}}
125 | bool_query4 = {"bool": {"must": [term3, bool_query3]}}
126 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}})
127 | self.assertEqual(queries, [
128 | {"nested": {"path": "my", "query": term3, "_name": "spam"}},
129 | {"nested": {"path": "my", "query": bool_query3}},
130 | {"nested": {"path": "my", "query": nested2}},
131 | {"nested": {"path": "my", "query": nested1}},
132 | {"nested": {"path": "my", "_name": "foo", "query": {
133 | "nested": {"path": "my.his", "query": term2}
134 | }}},
135 | {"nested": {"path": "my", "_name": "bar", "query": {
136 | "nested": {"path": "my.your", "query": term1}
137 | }}},
138 | ])
139 |
140 | def test_get_first_name(self):
141 | term = {"term": {"text": {"value": "bar", "_name": "bar"}}}
142 | query = [{"query": term, "_name": "spam"}, {"query": term, "_name": "beurre"}]
143 | name = get_first_name(query)
144 | self.assertEqual(name, "spam")
145 |
--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_schema.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.elasticsearch.schema import SchemaAnalyzer
4 |
5 |
6 | try:
7 | import elasticsearch_dsl
8 | ES_6 = elasticsearch_dsl.VERSION[0] >= 6
9 | except ImportError:
10 | ES_6 = True
11 |
12 |
13 | class SchemaAnalyzerTestCase(TestCase):
14 |
15 | MAPPING = {
16 | "properties": {
17 | "text": {"type": "text"},
18 | "author": {
19 | "type": "nested",
20 | "properties": {
21 | "firstname": {
22 | "type": "text",
23 | "fields": {
24 | # sub fields
25 | "english": {"analyzer": "english"},
26 | "raw": {"type": "keyword"},
27 | }
28 | },
29 | "lastname": {"type": "text"},
30 | "book": {
31 | "type": "nested",
32 | "properties": {
33 | "title": {"type": "text"},
34 | "isbn": { # an object field in deep nested field
35 | "type": "object",
36 | "properties": {
37 | "ref": {
38 | "type": "keyword",
39 | },
40 | },
41 | },
42 | "format": {
43 | "type": "nested",
44 | "properties": {
45 | "ftype": {"type": "keyword"},
46 | },
47 | },
48 | },
49 | },
50 | },
51 | },
52 | "publish": {
53 | "type": "nested",
54 | "properties": {
55 | "site": {"type": "keyword"},
56 | "idnum": {"type": "long"},
57 | },
58 | },
59 | "manager": {
60 | "type": "object",
61 | "properties": {
62 | "firstname": {"type": "text"},
63 | "address": { # an object field in an object field
64 | "type": "object",
65 | "properties": {
66 | "zipcode": {"type": "keyword"},
67 | },
68 | },
69 | "subteams": { # a nested in an object field
70 | "type": "nested",
71 | "properties": {
72 | "supervisor": { # with an object field inside
73 | "type": "object",
74 | "properties": {
75 | "name": {
76 | "type": "text",
77 | # sub field
78 | "fields": {"raw": {"type": "keyword"}},
79 | },
80 | },
81 | },
82 | },
83 | },
84 | },
85 | },
86 | },
87 | }
88 |
89 | INDEX_SETTINGS = {
90 | "settings": {
91 | "query": {"default_field": "text"},
92 | },
93 | "mappings": {},
94 | }
95 |
96 | def setUp(self):
97 | super().setUp()
98 | if ES_6:
99 | self.INDEX_SETTINGS["mappings"] = self.MAPPING
100 | else:
101 | self.INDEX_SETTINGS["mappings"]["type1"] = self.MAPPING
102 |
103 | def test_default_field(self):
104 | s = SchemaAnalyzer(self.INDEX_SETTINGS)
105 | self.assertEqual(s.default_field(), "text")
106 |
107 | def test_not_analyzed_fields(self):
108 | s = SchemaAnalyzer(self.INDEX_SETTINGS)
109 | self.assertEqual(
110 | sorted(s.not_analyzed_fields()),
111 | [
112 | 'author.book.format.ftype',
113 | 'author.book.isbn.ref',
114 | 'author.firstname.raw',
115 | 'manager.address.zipcode',
116 | 'manager.subteams.supervisor.name.raw',
117 | 'publish.idnum',
118 | 'publish.site',
119 | ],
120 | )
121 |
122 | def test_nested_fields(self):
123 | s = SchemaAnalyzer(self.INDEX_SETTINGS)
124 | self.assertEqual(
125 | s.nested_fields(),
126 | {
127 | 'author': {
128 | 'firstname': {},
129 | 'lastname': {},
130 | 'book': {
131 | 'format': {
132 | 'ftype': {}
133 | },
134 | 'title': {},
135 | 'isbn': {},
136 | },
137 | },
138 | 'publish': {
139 | 'site': {},
140 | 'idnum': {},
141 | },
142 | 'manager.subteams': { # FIXME !!!!
143 | 'supervisor': {},
144 | },
145 | }
146 | )
147 |
148 | def test_object_fields(self):
149 | s = SchemaAnalyzer(self.INDEX_SETTINGS)
150 | self.assertEqual(
151 | sorted(s.object_fields()),
152 | [
153 | 'author.book.isbn.ref',
154 | 'manager.address.zipcode',
155 | 'manager.firstname',
156 | 'manager.subteams.supervisor.name',
157 | ]
158 | )
159 |
160 | def test_sub_fields(self):
161 | s = SchemaAnalyzer(self.INDEX_SETTINGS)
162 | self.assertEqual(
163 | sorted(s.sub_fields()),
164 | [
165 | 'author.firstname.english',
166 | 'author.firstname.raw',
167 | 'manager.subteams.supervisor.name.raw',
168 | ]
169 | )
170 |
171 | def test_empty(self):
172 | s = SchemaAnalyzer({})
173 | self.assertEqual(s.default_field(), "*")
174 | self.assertEqual(list(s.not_analyzed_fields()), [])
175 | self.assertEqual(s.nested_fields(), {})
176 | self.assertEqual(list(s.object_fields()), [])
177 |
--------------------------------------------------------------------------------
/tests/test_pretty.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.pretty import Prettifier, prettify
4 | from luqum.tree import (
5 | SearchField, FieldGroup, Group, Word, AndOperation, OrOperation, UnknownOperation)
6 |
7 |
8 | class TestPrettify(TestCase):
9 |
10 | big_tree = AndOperation(
11 | Group(OrOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))), Word("fooooooooooo"))
12 | fat_tree = AndOperation(
13 | SearchField(
14 | "subject",
15 | FieldGroup(
16 | OrOperation(
17 | Word("fiiiiiiiiiiz"),
18 | AndOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))))),
19 | AndOperation(Word("fooooooooooo"), Word("wiiiiiiiiiz")))
20 |
21 | def test_one_liner(self):
22 | tree = AndOperation(Group(OrOperation(Word("bar"), Word("baz"))), Word("foo"))
23 | self.assertEqual(prettify(tree), "( bar OR baz ) AND foo")
24 |
25 | def test_with_unknown_op(self):
26 | prettify = Prettifier(indent=8, max_len=20)
27 | tree = UnknownOperation(
28 | Group(
29 | UnknownOperation(
30 | Word("baaaaaaaaaar"),
31 | Word("baaaaaaaaaaaaaz"))),
32 | Word("fooooooooooo"))
33 | self.assertEqual(
34 | "\n" + prettify(tree), """
35 | (
36 | baaaaaaaaaar
37 | baaaaaaaaaaaaaz
38 | )
39 | fooooooooooo""")
40 |
41 | def test_with_unknown_op_nested(self):
42 | prettify = Prettifier(indent=8, max_len=20)
43 | tree = OrOperation(
44 | UnknownOperation(
45 | Word("baaaaaaaaaar"),
46 | Word("baaaaaaaaaaaaaz")),
47 | Word("fooooooooooo"))
48 | self.assertEqual(
49 | "\n" + prettify(tree), """
50 | baaaaaaaaaar
51 | baaaaaaaaaaaaaz
52 | OR
53 | fooooooooooo""")
54 |
55 | def test_small(self):
56 | prettify = Prettifier(indent=8, max_len=20)
57 | self.assertEqual(
58 | "\n" + prettify(self.big_tree), """
59 | (
60 | baaaaaaaaaar
61 | OR
62 | baaaaaaaaaaaaaz
63 | )
64 | AND
65 | fooooooooooo""")
66 | self.assertEqual(
67 | "\n" + prettify(self.fat_tree), """
68 | subject: (
69 | fiiiiiiiiiiz
70 | OR
71 | baaaaaaaaaar
72 | AND
73 | baaaaaaaaaaaaaz
74 | )
75 | AND
76 | fooooooooooo
77 | AND
78 | wiiiiiiiiiz""")
79 |
80 | def test_small_inline_ops(self):
81 | prettify = Prettifier(indent=8, max_len=20, inline_ops=True)
82 | self.assertEqual("\n" + prettify(self.big_tree), """
83 | (
84 | baaaaaaaaaar OR
85 | baaaaaaaaaaaaaz ) AND
86 | fooooooooooo""")
87 | self.assertEqual("\n" + prettify(self.fat_tree), """
88 | subject: (
89 | fiiiiiiiiiiz OR
90 | baaaaaaaaaar AND
91 | baaaaaaaaaaaaaz ) AND
92 | fooooooooooo AND
93 | wiiiiiiiiiz""")
94 |
95 | def test_normal(self):
96 | prettify = Prettifier(indent=4, max_len=50)
97 | self.assertEqual("\n" + prettify(self.big_tree), """
98 | (
99 | baaaaaaaaaar OR baaaaaaaaaaaaaz
100 | )
101 | AND
102 | fooooooooooo""")
103 | self.assertEqual("\n" + prettify(self.fat_tree), """
104 | subject: (
105 | fiiiiiiiiiiz
106 | OR
107 | baaaaaaaaaar AND baaaaaaaaaaaaaz
108 | )
109 | AND
110 | fooooooooooo
111 | AND
112 | wiiiiiiiiiz""")
113 |
114 | def test_normal_inline_ops(self):
115 | prettify = Prettifier(indent=4, max_len=50, inline_ops=True)
116 | self.assertEqual("\n" + prettify(self.big_tree), """
117 | (
118 | baaaaaaaaaar OR baaaaaaaaaaaaaz ) AND
119 | fooooooooooo""")
120 | self.assertEqual("\n" + prettify(self.fat_tree), """
121 | subject: (
122 | fiiiiiiiiiiz OR
123 | baaaaaaaaaar AND baaaaaaaaaaaaaz ) AND
124 | fooooooooooo AND
125 | wiiiiiiiiiz""")
126 |
--------------------------------------------------------------------------------
/tests/test_quick_start.rst:
--------------------------------------------------------------------------------
1 | ../docs/source/quick_start.rst
--------------------------------------------------------------------------------
/tests/test_thread.py:
--------------------------------------------------------------------------------
1 | import queue
2 | import threading
3 |
4 | import ply.lex as lex
5 |
6 | from luqum.parser import parser
7 | from luqum.thread import parse
8 | from tests import alternative_lexer
9 |
10 |
11 | def test_thread_parse():
12 |
13 | result_queue = queue.Queue()
14 | qs1 = """
15 | (title:"foo bar" AND body:"quick fox") OR title:fox AND
16 | (title:"foo bar" AND body:"quick fox") OR
17 | title:fox AND (title:"foo bar" AND body:"quick fox") OR
18 | title:fox AND (title:"foo bar" AND body:"quick fox") OR
19 | title:fox AND (title:"foo bar" AND body:"quick fox") OR title:fox
20 | """
21 | expected_tree = parser.parse(qs1)
22 |
23 | def run(q):
24 | parse(qs1)
25 | tree = parse(qs1)
26 | q.put(tree)
27 |
28 | # make concurrents calls
29 | threads = [threading.Thread(target=run, args=(result_queue,)) for i in range(100)]
30 | for thread in threads:
31 | thread.start()
32 | for thread in threads:
33 | thread.join()
34 | assert result_queue.qsize() == 100
35 | for i in range(100):
36 | assert result_queue.get() == expected_tree
37 |
38 |
39 | def test_thread_lex_global_state():
40 | """
41 | Last Lexer is used globally by default by the parser. If another library
42 | creates another lexer, it should not impact luqum.
43 |
44 | More info: [Multiple Parsers and
45 | Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
46 | """
47 | qs = '(title:"foo bar" AND body:"quick fox")'
48 |
49 | lex.lex(module=alternative_lexer)
50 | # if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was
51 | # used.
52 | parse(qs)
53 |
--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from unittest import TestCase
2 |
3 | from luqum.parser import parser
4 | from luqum.tree import (Group, Word, AndOperation, OrOperation, BoolOperation,
5 | UnknownOperation, Prohibit, Plus, From, To, Range, SearchField,
6 | Boost)
7 | from luqum.utils import UnknownOperationResolver, OpenRangeTransformer
8 |
9 |
10 | class UnknownOperationResolverTestCase(TestCase):
11 |
12 | def test_and_resolution(self):
13 | tree = (
14 | UnknownOperation(
15 | Word("a"),
16 | Word("b"),
17 | OrOperation(Word("c"), Word("d"))))
18 | expected = (
19 | AndOperation(
20 | Word("a"),
21 | Word("b"),
22 | OrOperation(Word("c"), Word("d"))))
23 | resolver = UnknownOperationResolver(resolve_to=AndOperation)
24 | self.assertEqual(resolver(tree), expected)
25 |
26 | def test_or_resolution(self):
27 | tree = (
28 | UnknownOperation(
29 | Word("a"),
30 | Word("b"),
31 | AndOperation(Word("c"), Word("d"))))
32 | expected = (
33 | OrOperation(
34 | Word("a"),
35 | Word("b"),
36 | AndOperation(Word("c"), Word("d"))))
37 | resolver = UnknownOperationResolver(resolve_to=OrOperation)
38 | self.assertEqual(resolver(tree), expected)
39 |
40 | def test_lucene_resolution_simple(self):
41 | tree = (
42 | UnknownOperation(
43 | Word("a"),
44 | Word("b"),
45 | UnknownOperation(Word("c"), Word("d"))))
46 | expected = (
47 | AndOperation(
48 | Word("a"),
49 | Word("b"),
50 | AndOperation(Word("c"), Word("d"))))
51 | resolver = UnknownOperationResolver(resolve_to=None)
52 | self.assertEqual(resolver(tree), expected)
53 |
54 | def test_lucene_resolution_bool(self):
55 | tree = parser.parse("a b (+f +g) -(c d) +e")
56 | expected = (
57 | BoolOperation(
58 | Word("a"),
59 | Word("b"),
60 | Group(BoolOperation(Plus(Word("f")), Plus(Word("g")))),
61 | Prohibit(Group(BoolOperation(Word("c"), Word("d")))),
62 | Plus(Word('e'))))
63 | resolver = UnknownOperationResolver(resolve_to=BoolOperation)
64 | self.assertEqual(resolver(tree), expected)
65 |
66 | def test_lucene_resolution_last_op(self):
67 | tree = (
68 | OrOperation(
69 | Word("a"),
70 | Word("b"),
71 | UnknownOperation(Word("c"), Word("d")),
72 | AndOperation(
73 | Word("e"),
74 | UnknownOperation(Word("f"), Word("g"))),
75 | UnknownOperation(Word("i"), Word("j")),
76 | OrOperation(
77 | Word("k"),
78 | UnknownOperation(Word("l"), Word("m"))),
79 | UnknownOperation(Word("n"), Word("o"))))
80 | expected = (
81 | OrOperation(
82 | Word("a"),
83 | Word("b"),
84 | OrOperation(Word("c"), Word("d")),
85 | AndOperation(
86 | Word("e"),
87 | AndOperation(Word("f"), Word("g"))),
88 | AndOperation(Word("i"), Word("j")),
89 | OrOperation(
90 | Word("k"),
91 | OrOperation(Word("l"), Word("m"))),
92 | OrOperation(Word("n"), Word("o"))))
93 | resolver = UnknownOperationResolver(resolve_to=None)
94 | self.assertEqual(resolver(tree), expected)
95 |
96 | def test_lucene_resolution_last_op_with_group(self):
97 | tree = (
98 | OrOperation(
99 | Word("a"),
100 | Word("b"),
101 | Group(
102 | AndOperation(
103 | Word("c"),
104 | UnknownOperation(Word("d"), Word("e")))),
105 | UnknownOperation(Word("f"), Word("g")),
106 | Group(
107 | UnknownOperation(Word("h"), Word("i")))))
108 | expected = (
109 | OrOperation(
110 | Word("a"),
111 | Word("b"),
112 | Group(
113 | AndOperation(
114 | Word("c"),
115 | AndOperation(Word("d"), Word("e")))),
116 | OrOperation(Word("f"), Word("g")),
117 | Group(
118 | AndOperation(Word("h"), Word("i")))))
119 | resolver = UnknownOperationResolver(resolve_to=None)
120 | self.assertEqual(resolver(tree), expected)
121 |
122 | def test_resolve_to_verification(self):
123 | with self.assertRaises(ValueError):
124 | UnknownOperationResolver(resolve_to=object())
125 |
126 | def test_head_tail_pos(self):
127 | tree = parser.parse("\ra\nb (c\t (d e f)) ")
128 | resolver = UnknownOperationResolver(resolve_to=None)
129 | transformed = resolver(tree)
130 | self.assertEqual(str(transformed), "\ra\nAND b AND (c\t AND (d AND e AND f)) ")
131 | self.assertEqual(transformed.pos, tree.pos)
132 | self.assertEqual(transformed.size, tree.size)
133 | and_op, orig_op = transformed.children[2].children[0], tree.children[2].children[0]
134 | self.assertEqual(type(and_op), AndOperation)
135 | self.assertEqual(and_op.pos, orig_op.pos)
136 | self.assertEqual(and_op.size, orig_op.size)
137 | and_op, orig_op = and_op.children[1].children[0], orig_op.children[1].children[0]
138 | self.assertEqual(type(and_op), AndOperation)
139 | self.assertEqual(and_op.pos, orig_op.pos)
140 | self.assertEqual(and_op.size, orig_op.size)
141 |
142 | resolver = UnknownOperationResolver(resolve_to=OrOperation)
143 | transformed = resolver(tree)
144 | self.assertEqual(str(transformed), "\ra\nOR b OR (c\t OR (d OR e OR f)) ")
145 |
146 |
147 | class OpenRangeTransformerTestCase(TestCase):
148 | def test_simple_resolution_from(self):
149 | tree = (
150 | From(Word("1"), True)
151 | )
152 | expected = (
153 | Range(Word("1", tail=" "), Word("*", head=" "), True, True)
154 | )
155 | for merge_ranges in (True, False):
156 | with self.subTest(merge_ranges=merge_ranges):
157 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
158 | output = resolver(tree)
159 | self.assertEqual(output, expected)
160 | self.assertEqual(str(output), str(expected))
161 |
162 | def test_simple_resolution_to(self):
163 | tree = (
164 | To(Word("1"), False)
165 | )
166 | expected = (
167 | Range(Word("*", tail=" "), Word("1", head=" "), True, False)
168 | )
169 | for merge_ranges in (True, False):
170 | with self.subTest(merge_ranges=merge_ranges):
171 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
172 | output = resolver(tree)
173 | self.assertEqual(output, expected)
174 | self.assertEqual(str(output), str(expected))
175 |
176 | def test_and_resolution(self):
177 | tree = (
178 | AndOperation(
179 | From(Word("1"), True),
180 | To(Word("2"), True),
181 | )
182 | )
183 | expected = (
184 | AndOperation(
185 | Range(Word("1", tail=" "), Word("2", head=" "), True, True)
186 | )
187 | )
188 | resolver = OpenRangeTransformer(merge_ranges=True)
189 | output = resolver(tree)
190 | self.assertEqual(output, expected)
191 | self.assertEqual(str(output), str(expected))
192 |
193 | def test_and_resolution_without_merge(self):
194 | tree = (
195 | AndOperation(
196 | From(Word("1"), True),
197 | To(Word("2"), True),
198 | )
199 | )
200 | expected = (
201 | AndOperation(
202 | Range(Word("1", tail=" "), Word("*", head=" "), True),
203 | Range(Word("*", tail=" "), Word("2", head=" "), True),
204 | )
205 | )
206 | resolver = OpenRangeTransformer(merge_ranges=False)
207 | output = resolver(tree)
208 | self.assertEqual(output, expected)
209 | self.assertEqual(str(output), str(expected))
210 |
211 | def test_unjoined_resolution(self):
212 | tree = (
213 | AndOperation(
214 | From(Word("1"), False),
215 | From(Word("2"), True),
216 | )
217 | )
218 | expected = (
219 | AndOperation(
220 | Range(Word("1", tail=" "), Word("*", head=" "), False, True),
221 | Range(Word("2", tail=" "), Word("*", head=" "), True, True)
222 | )
223 | )
224 | resolver = OpenRangeTransformer(merge_ranges=True)
225 | output = resolver(tree)
226 | self.assertEqual(output, expected)
227 | self.assertEqual(str(output), str(expected))
228 |
229 | def test_normal_ranges_are_untouched(self):
230 | tree = (
231 | AndOperation(
232 | Range(Word("1"), Word("2"), True, True),
233 | Range(Word("*"), Word("*"), True, True),
234 | Range(Word("1"), Word("*"), True, True),
235 | )
236 | )
237 | for merge_ranges in (True, False):
238 | with self.subTest(merge_ranges=merge_ranges):
239 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
240 | output = resolver(tree)
241 | self.assertEqual(output, tree)
242 |
243 | def test_first_range_is_merged(self):
244 | tree = (
245 | AndOperation(
246 | Range(Word("*"), Word("2"), True, True),
247 | Range(Word("*"), Word("*"), True, True),
248 | Range(Word("*"), Word("3"), True, True),
249 | Range(Word("1"), Word("*"), True, True),
250 | Range(Word("4"), Word("*"), True, True),
251 | )
252 | )
253 | expected = (
254 | AndOperation(
255 | Range(Word("1"), Word("2"), True, True),
256 | Range(Word("*"), Word("*"), True, True),
257 | Range(Word("4"), Word("3"), True, True),
258 | )
259 | )
260 | resolver = OpenRangeTransformer(merge_ranges=True)
261 | output = resolver(tree)
262 | self.assertEqual(output, expected)
263 | self.assertEqual(str(output), str(expected))
264 |
265 | def test_do_not_merge_unknown(self):
266 | tree = (
267 | UnknownOperation(
268 | Range(Word("1"), Word("*"), True, True),
269 | Range(Word("*"), Word("2"), True, True),
270 | )
271 | )
272 | resolver = OpenRangeTransformer(merge_ranges=True)
273 | output = resolver(tree)
274 | self.assertEqual(output, tree)
275 |
276 | def test_do_not_merge_searchfield(self):
277 | tree = (
278 | AndOperation(
279 | Range(Word("1"), Word("*"), True, True),
280 | SearchField("foo", Range(Word("*"), Word("2"), True, True))
281 | )
282 | )
283 | resolver = OpenRangeTransformer(merge_ranges=True)
284 | output = resolver(tree)
285 | self.assertEqual(output, tree)
286 |
287 | def test_do_not_merge_boosted(self):
288 | tree = (
289 | AndOperation(
290 | Boost(Range(Word("1"), Word("*"), True, True), 2),
291 | Boost(Range(Word("*"), Word("2"), True, True), 2),
292 | )
293 | )
294 | resolver = OpenRangeTransformer(merge_ranges=True)
295 | output = resolver(tree)
296 | self.assertEqual(output, tree)
297 |
--------------------------------------------------------------------------------
/tests/test_visitor.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import copy
3 | from unittest import TestCase
4 |
5 | from luqum.tree import (
6 | NONE_ITEM, Group, Word, Phrase, AndOperation, OrOperation, Proximity, SearchField,
7 | Boost, Fuzzy, Regex,
8 | )
9 | from luqum.visitor import (
10 | PathTrackingTransformer, PathTrackingVisitor, TreeTransformer, TreeVisitor,
11 | )
12 |
13 |
14 | class TreeVisitorTestCase(TestCase):
15 |
16 | class BasicVisitor(TreeVisitor):
17 | """Dummy visitor, simply yielding a list of nodes. """
18 |
19 | def generic_visit(self, node, context):
20 | yield node
21 | yield from super().generic_visit(node, context)
22 |
23 | class TrackingParentsVisitor(TreeVisitor):
24 | """Visitor, yielding nodes and parents."""
25 |
26 | def generic_visit(self, node, context):
27 | yield node, context.get("parents")
28 | yield from super().generic_visit(node, context)
29 |
30 | class MROVisitor(TreeVisitor):
31 |
32 | def visit_or_operation(self, node, context):
33 | yield "{} OR {}".format(*node.children)
34 | yield from super().generic_visit(node, context)
35 |
36 | def visit_base_operation(self, node, context):
37 | yield "{} BASE_OP {}".format(*node.children)
38 | yield from super().generic_visit(node, context)
39 |
40 | def visit_word(self, node, parents=[]):
41 | yield node.value
42 |
43 | def test_generic_visit(self):
44 | tree = AndOperation(Word("foo"), Word("bar"))
45 | visitor = TreeVisitor()
46 | nodes = visitor.visit(tree)
47 | self.assertEqual(nodes, [])
48 | # with a context for coverage…
49 | nodes = visitor.visit(tree, context={})
50 | self.assertEqual(nodes, [])
51 |
52 | def test_basic_traversal(self):
53 | tree = AndOperation(Word("foo"), Word("bar"))
54 | visitor = self.BasicVisitor()
55 | nodes = visitor.visit(tree)
56 | self.assertListEqual([tree, Word("foo"), Word("bar")], nodes)
57 |
58 | def test_parents_tracking(self):
59 | tree = AndOperation(Word("foo"), Proximity(Phrase('"bar"'), 2))
60 | visitor = self.TrackingParentsVisitor(track_parents=True)
61 | nodes = visitor.visit(tree)
62 | self.assertListEqual(
63 | [
64 | (tree, None),
65 | (Word("foo"), (tree,)),
66 | (Proximity(Phrase('"bar"'), degree=2), (tree,)),
67 | (Phrase('"bar"'), (tree, Proximity(Phrase('"bar"'), 2))),
68 | ],
69 | nodes,
70 | )
71 |
72 | def test_parents_tracking_no_tracking(self):
73 | tree = AndOperation(Word("foo"), Phrase('"bar"'))
74 | # no parents tracking !
75 | visitor = self.TrackingParentsVisitor()
76 | nodes = visitor.visit(tree)
77 | self.assertListEqual([(tree, None), (Word("foo"), None), (Phrase('"bar"'), None)], nodes)
78 |
79 | def test_mro(self):
80 | visitor = self.MROVisitor()
81 |
82 | tree = OrOperation(Word('a'), Word('b'))
83 | result = visitor.visit(tree)
84 | self.assertEqual(list(result), ['a OR b', 'a', 'b'])
85 |
86 | # AndOperation has no specific method,
87 | # but inherists BaseOperation, hence uses visit_base_operation
88 | tree = AndOperation(Word('a'), Word('b'))
89 | result = visitor.visit(tree)
90 | self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b'])
91 |
92 |
93 | class TreeTransformerTestCase(TestCase):
94 |
95 | class BasicTransformer(TreeTransformer):
96 | """
97 | Dummy transformer that simply turn any Word node's value into "lol"
98 | """
99 | def visit_word(self, node, context):
100 | yield Word(context.get("replacement", 'lol'))
101 |
102 | def visit_phrase(self, node, context):
103 | yield from []
104 |
105 | def visit_base_operation(self, node, context):
106 | new_node, = super().generic_visit(node, context)
107 | # if new_node has no operands, it's like a removal
108 | if len(new_node.children) == 0:
109 | return
110 | # if we have only one operands return it
111 | elif len(new_node.children) == 1:
112 | yield new_node.children[0]
113 | else:
114 | # normal return
115 | yield new_node
116 |
117 | class TrackingParentsTransformer(TreeTransformer):
118 |
119 | def visit_word(self, node, context):
120 | new_node, = self.generic_visit(node, context)
121 | if any(isinstance(p, SearchField) for p in context["new_parents"]):
122 | new_node.value = "lol"
123 | yield new_node
124 |
125 | class RaisingTreeTransformer(TreeTransformer):
126 |
127 | def generic_visit(self, node, context):
128 | yield node
129 | yield node
130 |
131 | class RaisingTreeTransformer2(TreeTransformer):
132 |
133 | def generic_visit(self, node, context):
134 | raise ValueError("Random error")
135 |
136 | def test_basic_traversal(self):
137 | tree = AndOperation(Word("foo"), Word("bar"))
138 |
139 | transformer = self.BasicTransformer()
140 | new_tree = transformer.visit(tree)
141 | self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol")))
142 |
143 | def test_context_value(self):
144 | tree = AndOperation(Word("foo"), Word("bar"))
145 |
146 | transformer = self.BasicTransformer()
147 | new_tree = transformer.visit(tree, context={"replacement": "rotfl"})
148 | self.assertEqual(new_tree, AndOperation(Word("rotfl"), Word("rotfl")))
149 |
150 | def test_no_transform(self):
151 | tree = AndOperation(NONE_ITEM, NONE_ITEM)
152 | transformer = self.BasicTransformer()
153 | new_tree = transformer.visit(tree)
154 | self.assertEqual(new_tree, tree)
155 |
156 | def test_one_word(self):
157 | tree = Word("foo")
158 | transformer = self.BasicTransformer()
159 | new_tree = transformer.visit(tree)
160 | self.assertEqual(new_tree, Word("lol"))
161 |
162 | def test_tracking_parents(self):
163 | tree = OrOperation(Word("foo"), SearchField("test", Word("bar")))
164 | expected = OrOperation(Word("foo"), SearchField("test", Word("lol")))
165 | transformer = self.TrackingParentsTransformer(track_new_parents=True)
166 | new_tree = transformer.visit(tree)
167 | self.assertEqual(new_tree, expected)
168 |
169 | def test_removal(self):
170 | tree = AndOperation(
171 | OrOperation(Word("spam"), Word("ham")),
172 | AndOperation(Word("foo"), Phrase('"bar"')),
173 | AndOperation(Phrase('"baz"'), Phrase('"biz"')),
174 | )
175 |
176 | transformer = self.BasicTransformer()
177 | new_tree = transformer.visit(tree)
178 |
179 | self.assertEqual(
180 | new_tree,
181 | AndOperation(OrOperation(Word("lol"), Word("lol")), Word("lol")),
182 | )
183 |
184 | def test_silent_value_error(self):
185 | # in the case some attribute mislead the search for node do not raise
186 | tree = AndOperation(Word("a"), Word("b"))
187 | setattr(tree, "misleading1", ())
188 | setattr(tree, "misleading2", [])
189 | # hackishly patch __dict__ to be sure we have operands in right order for test
190 | tree.__dict__ = collections.OrderedDict(tree.__dict__)
191 | tree.__dict__['operands'] = tree.__dict__.pop('operands') # operands are now last
192 |
193 | transformer = self.BasicTransformer()
194 | new_tree = transformer.visit(tree)
195 |
196 | self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol")))
197 |
198 | def test_repeating_expression(self):
199 | # non regression test
200 | tree = AndOperation(
201 | Group(OrOperation(Word('bar'), Word('foo'))),
202 | Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))),
203 | )
204 | # basic transformer should not change tree
205 | same_tree = TreeTransformer().visit(copy.deepcopy(tree))
206 | self.assertEqual(same_tree, tree)
207 |
208 | def test_more_than_one_element_raises(self):
209 | tree = Word("foo")
210 | with self.assertRaises(ValueError) as raised:
211 | self.RaisingTreeTransformer().visit(tree)
212 | self.assertIn(
213 | "The visit of the tree should have produced exactly one element",
214 | str(raised.exception),
215 | )
216 |
217 | def test_value_error_pass_through(self):
218 | # raising a value error that is not related to unpacking passed through
219 | tree = Word("foo")
220 | with self.assertRaises(ValueError) as raised:
221 | self.RaisingTreeTransformer2().visit(tree)
222 | self.assertEqual("Random error", str(raised.exception))
223 |
224 |
225 | class PathTrackingVisitorTestCase(TestCase):
226 |
227 | class TermPathVisitor(PathTrackingVisitor):
228 |
229 | def visit_term(self, node, context):
230 | yield (context["path"], node.value)
231 |
232 | @classmethod
233 | def setUpClass(cls):
234 | cls.visit = cls.TermPathVisitor().visit
235 |
236 | def test_visit_simple_term(self):
237 | paths = self.visit(Word("foo"))
238 | self.assertEqual(paths, [((), "foo")])
239 |
240 | def test_visit_complex(self):
241 | tree = AndOperation(
242 | Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))),
243 | Proximity(Phrase('"spam ham"')),
244 | SearchField("fizz", Regex("/fuzz/")),
245 | )
246 | paths = self.visit(tree)
247 | self.assertEqual(
248 | sorted(paths),
249 | [
250 | ((0, 0, 0), "foo"),
251 | ((0, 0, 1), "bar"),
252 | ((0, 0, 2, 0, 0), "baz"),
253 | ((1, 0), '"spam ham"'),
254 | ((2, 0), '/fuzz/'),
255 | ]
256 | )
257 |
258 |
259 | class PathTrackingTransformerTestCase(TestCase):
260 |
261 | class TermPathTransformer(PathTrackingTransformer):
262 |
263 | def visit_term(self, node, context):
264 | path = '-'.join(str(i) for i in context['path'])
265 | quote = '"' if isinstance(node, Phrase) else "/" if isinstance(node, Regex) else ""
266 | value = node.value.strip(quote)
267 | new_node = node.clone_item(value=f"{quote}{value}@{path}{quote}")
268 | yield new_node
269 |
270 | @classmethod
271 | def setUpClass(cls):
272 | cls.transform = cls.TermPathTransformer().visit
273 |
274 | def test_visit_simple_term(self):
275 | tree = self.transform(Word("foo"))
276 | self.assertEqual(tree, Word("foo@"))
277 |
278 | def test_visit_complex(self):
279 | tree = AndOperation(
280 | Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))),
281 | Proximity(Phrase('"spam ham"')),
282 | SearchField("fizz", Regex("/fuzz/")),
283 | )
284 | transformed = self.transform(tree)
285 | expected = AndOperation(
286 | Group(OrOperation(
287 | Word("foo@0-0-0"),
288 | Word("bar@0-0-1"),
289 | Boost(Fuzzy(Word("baz@0-0-2-0-0")), force=2),
290 | )),
291 | Proximity(Phrase('"spam ham@1-0"')),
292 | SearchField("fizz", Regex("/fuzz@2-0/")),
293 | )
294 | self.assertEqual(transformed, expected)
295 |
--------------------------------------------------------------------------------