├── .github └── workflows │ └── test.yml ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CHANGELOG.rst ├── LICENSE ├── LICENSE.Apache2 ├── LICENSE.LGPLv3 ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── make.bat └── source │ ├── _static │ └── luqum-logo.png │ ├── about.rst │ ├── api.rst │ ├── conf.py │ ├── index.rst │ ├── install.rst │ └── quick_start.rst ├── luqum-logo.png ├── luqum ├── __init__.py ├── auto_head_tail.py ├── check.py ├── deprecated_utils.py ├── elasticsearch │ ├── __init__.py │ ├── nested.py │ ├── schema.py │ ├── tree.py │ └── visitor.py ├── exceptions.py ├── head_tail.py ├── naming.py ├── parser.py ├── parsetab.py ├── pretty.py ├── thread.py ├── tree.py ├── utils.py └── visitor.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── alternative_lexer.py ├── test_auto_head_tail.py ├── test_check.py ├── test_deprecated_utils.py ├── test_elasticsearch ├── __init__.py ├── book.json ├── es_integration_utils.py ├── test_es_integration.py ├── test_es_naming.py ├── test_estree.py ├── test_naming.py ├── test_nested.py ├── test_schema.py └── tests.py ├── test_headtail.py ├── test_naming.py ├── test_parser.py ├── test_pretty.py ├── test_quick_start.rst ├── test_thread.py ├── test_tree.py ├── test_utils.py └── test_visitor.py /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: code quality checks and unit tests 2 | 3 | on: 4 | pull_request: 5 | 6 | 7 | jobs: 8 | # quality 9 | quality_checks: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: 14 | - "3.10" 15 | - "3.11" 16 | - "3.12" 17 | - "3.13" 18 | es-version: 19 | - "8.17.1" 20 | es-dsl-version: 21 | - "8.17.1" 22 | include: 23 | # only test older ES version with python 3.10 24 | - python-version: "3.10" 25 | es-version: "6.8.23" 26 | es-dsl-version: "6.4.0" 27 | - python-version: "3.10" 28 | es-version: "7.17.6" 29 | es-dsl-version: "7.4.0" 30 | # but also runs with newer versions of ES 31 | - python-version: "3.10" 32 | es-version: "8.17.1" 33 | es-dsl-version: "8.17.1" 34 | 35 | env: 36 | ES_VERSION: "${{ matrix.es-version }}" 37 | steps: 38 | #---------------------------------------------- 39 | # check-out repo and set-up python 40 | #---------------------------------------------- 41 | - name: Check out repository 42 | uses: actions/checkout@v3 43 | - name: Setup python 44 | uses: actions/setup-python@v4 45 | with: 46 | python-version: "${{ matrix.python-version }}" 47 | - name: install project 48 | run: | 49 | pip install -r requirements-dev.txt 50 | pip install . 51 | # coveralls yet incompatible with python3.13 yet 52 | [[ ${{ matrix.python-version }} != 3.13 ]] && pip install coveralls 53 | pip install elasticsearch-dsl==${{ matrix.es-dsl-version }} 54 | - name: run tests 55 | run: | 56 | make quality && \ 57 | make es_tests 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # PLY 7 | parser.out 8 | parsetab.py 9 | 10 | # coverage 11 | cover/ 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | .venv 51 | nosetests.xml 52 | coverage.xml 53 | *,cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | 63 | # Sphinx documentation 64 | docs/build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # nose 70 | .noseids 71 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-20.04 10 | tools: 11 | python: "3.9" 12 | 13 | # Build documentation in the docs/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/source/conf.py 16 | 17 | # If using Sphinx, optionally build your docs in additional formats such as PDF 18 | formats: 19 | - pdf 20 | - epub 21 | 22 | # Optionally declare the Python requirements required to build your docs 23 | python: 24 | install: 25 | - requirements: requirements-dev.txt -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | matrix: 4 | include: 5 | - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1 6 | python: 3.7 7 | sudo: true 8 | dist: bionic 9 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0 10 | python: 3.7 11 | dist: bionic 12 | sudo: true 13 | - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1 14 | python: 3.8 15 | sudo: true 16 | dist: bionic 17 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0 18 | python: 3.8 19 | dist: bionic 20 | sudo: true 21 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0 22 | python: 3.9 23 | dist: bionic 24 | sudo: true 25 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/6.x/apt ES_DSL_VERS=6.3.1 ES_VERS=6.4.3 26 | python: 3.10 27 | dist: bionic 28 | sudo: true 29 | - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0 30 | python: 3.10 31 | dist: bionic 32 | sudo: true 33 | 34 | 35 | before_install: 36 | - sudo rm /etc/apt/sources.list; sudo touch /etc/apt/sources.list 37 | - wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - 38 | - echo "deb $ES_APT_URL stable main" | sudo tee -a /etc/apt/sources.list.d/elastic.list 39 | - sudo apt-get update && sudo apt-get install elasticsearch=$ES_VERS -y --allow-downgrades 40 | - sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch 41 | - sudo systemctl start elasticsearch.service 42 | - while ! curl -XGET "localhost:9200";do sleep 1; done 43 | - curl localhost:9200 44 | 45 | install: 46 | - pip install . 47 | - pip install -r requirements-dev.txt 48 | - pip install coveralls 49 | - pip install elasticsearch-dsl==$ES_DSL_VERS 50 | 51 | script: 52 | - make tests 53 | - make quality 54 | 55 | after_success: 56 | # coveralls only for python3.8 and ES 7 57 | - python --version |grep 3.8 && [ $ES_DSL_VERS == "7.2.1" ] && coveralls 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This project is dualed licensed. 2 | 3 | See LICENSE.LGPLv3 and LICENSE.Apache2 4 | -------------------------------------------------------------------------------- /LICENSE.Apache2: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /LICENSE.LGPLv3: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include *.rst 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ES_VERSION ?= 8.17.1 2 | 3 | tests: 4 | pytest 5 | 6 | # integration test with ES using docker 7 | es_tests: 8 | ( docker ps |grep luqum_test_es ) || \ 9 | docker run --name luqum_test_es --rm -d -ti -p 127.0.0.1:9200:9200 \ 10 | -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ 11 | -e "xpack.security.enabled=false" \ 12 | elasticsearch:${ES_VERSION} 13 | # wait ES to be ready 14 | @echo "waiting for ES to be ready" 15 | @while ! curl -XGET "localhost:9200" >/dev/null 2>&1;do sleep 1; echo -n "."; done 16 | pytest 17 | docker stop luqum_test_es 18 | 19 | quality: 20 | flake8 luqum tests 21 | 22 | # To upload files, you need to have a ~/.pypirc file locally. 23 | # This file should contain all the necessary passwords and API-tokens. 24 | distribute: 25 | rm -r build 26 | rm dist/* 27 | python -m build --wheel 28 | python -m build --sdist 29 | python -m twine upload --verbose --repository luqum dist/* 30 | 31 | .PHONY: tests quality distribute 32 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | luqum - A lucene query parser in Python, using PLY 2 | ######################################################### 3 | 4 | |pypi-version| |readthedocs| |travis| |coveralls| 5 | 6 | |logo| 7 | 8 | "luqum" (as in LUcene QUery Manipolator) is a tool to parse queries 9 | written in the `Lucene Query DSL`_ and build an abstract syntax tree 10 | to inspect, analyze or otherwise manipulate search queries. 11 | 12 | It enables enriching the Lucene Query DSL meanings 13 | (for example to support nested object searches or have particular treatments on some fields), 14 | and transform lucene DSL queries to native `ElasticSearch JSON DSL`_ 15 | 16 | Thanks to luqum, your users may continue to write queries like: 17 | `author.last_name:Smith OR author:(age:[25 TO 34] AND first_name:John)` 18 | and you will be able to leverage ElasticSearch query DSL, 19 | and control the precise meaning of each search terms. 20 | 21 | Luqum is dual licensed under Apache2.0 and LGPLv3. 22 | 23 | Compatible with Python 3.10+ 24 | 25 | Installation 26 | ============ 27 | 28 | ``pip install luqum`` 29 | 30 | 31 | Dependencies 32 | ============ 33 | 34 | `PLY`_ >= 3.11 35 | 36 | 37 | Full documentation 38 | ================== 39 | 40 | http://luqum.readthedocs.org/en/latest/ 41 | 42 | 43 | .. _`Lucene Query DSL` : https://lucene.apache.org/core/3_6_0/queryparsersyntax.html 44 | .. _`ElasticSearch JSON DSL`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html 45 | 46 | .. _`PLY`: http://www.dabeaz.com/ply/ 47 | 48 | .. |logo| image:: https://raw.githubusercontent.com/jurismarches/luqum/master/luqum-logo.png 49 | 50 | .. |pypi-version| image:: https://img.shields.io/pypi/v/luqum.svg 51 | :target: https://pypi.python.org/pypi/luqum 52 | :alt: Latest PyPI version 53 | .. |travis| image:: http://img.shields.io/travis/jurismarches/luqum/master.svg?style=flat 54 | :target: https://travis-ci.org/jurismarches/luqum 55 | .. |coveralls| image:: http://img.shields.io/coveralls/jurismarches/luqum/master.svg?style=flat 56 | :target: https://coveralls.io/r/jurismarches/luqum 57 | .. |readthedocs| image:: https://readthedocs.org/projects/luqum/badge/?version=latest 58 | :target: http://luqum.readthedocs.org/en/latest/?badge=latest 59 | :alt: Documentation Status 60 | 61 | 62 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/luqum.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/luqum.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/luqum" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/luqum" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. epub3 to make an epub3 31 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 32 | echo. text to make text files 33 | echo. man to make manual pages 34 | echo. texinfo to make Texinfo files 35 | echo. gettext to make PO message catalogs 36 | echo. changes to make an overview over all changed/added/deprecated items 37 | echo. xml to make Docutils-native XML files 38 | echo. pseudoxml to make pseudoxml-XML files for display purposes 39 | echo. linkcheck to check all external links for integrity 40 | echo. doctest to run all doctests embedded in the documentation if enabled 41 | echo. coverage to run coverage check of the documentation if enabled 42 | echo. dummy to check syntax errors of document sources 43 | goto end 44 | ) 45 | 46 | if "%1" == "clean" ( 47 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 48 | del /q /s %BUILDDIR%\* 49 | goto end 50 | ) 51 | 52 | 53 | REM Check if sphinx-build is available and fallback to Python version if any 54 | %SPHINXBUILD% 1>NUL 2>NUL 55 | if errorlevel 9009 goto sphinx_python 56 | goto sphinx_ok 57 | 58 | :sphinx_python 59 | 60 | set SPHINXBUILD=python -m sphinx.__init__ 61 | %SPHINXBUILD% 2> nul 62 | if errorlevel 9009 ( 63 | echo. 64 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 65 | echo.installed, then set the SPHINXBUILD environment variable to point 66 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 67 | echo.may add the Sphinx directory to PATH. 68 | echo. 69 | echo.If you don't have Sphinx installed, grab it from 70 | echo.http://sphinx-doc.org/ 71 | exit /b 1 72 | ) 73 | 74 | :sphinx_ok 75 | 76 | 77 | if "%1" == "html" ( 78 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 79 | if errorlevel 1 exit /b 1 80 | echo. 81 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 82 | goto end 83 | ) 84 | 85 | if "%1" == "dirhtml" ( 86 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 87 | if errorlevel 1 exit /b 1 88 | echo. 89 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 90 | goto end 91 | ) 92 | 93 | if "%1" == "singlehtml" ( 94 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 95 | if errorlevel 1 exit /b 1 96 | echo. 97 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 98 | goto end 99 | ) 100 | 101 | if "%1" == "pickle" ( 102 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 103 | if errorlevel 1 exit /b 1 104 | echo. 105 | echo.Build finished; now you can process the pickle files. 106 | goto end 107 | ) 108 | 109 | if "%1" == "json" ( 110 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 111 | if errorlevel 1 exit /b 1 112 | echo. 113 | echo.Build finished; now you can process the JSON files. 114 | goto end 115 | ) 116 | 117 | if "%1" == "htmlhelp" ( 118 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 119 | if errorlevel 1 exit /b 1 120 | echo. 121 | echo.Build finished; now you can run HTML Help Workshop with the ^ 122 | .hhp project file in %BUILDDIR%/htmlhelp. 123 | goto end 124 | ) 125 | 126 | if "%1" == "qthelp" ( 127 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 128 | if errorlevel 1 exit /b 1 129 | echo. 130 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 131 | .qhcp project file in %BUILDDIR%/qthelp, like this: 132 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\luqum.qhcp 133 | echo.To view the help file: 134 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\luqum.ghc 135 | goto end 136 | ) 137 | 138 | if "%1" == "devhelp" ( 139 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 140 | if errorlevel 1 exit /b 1 141 | echo. 142 | echo.Build finished. 143 | goto end 144 | ) 145 | 146 | if "%1" == "epub" ( 147 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 148 | if errorlevel 1 exit /b 1 149 | echo. 150 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 151 | goto end 152 | ) 153 | 154 | if "%1" == "epub3" ( 155 | %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 156 | if errorlevel 1 exit /b 1 157 | echo. 158 | echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. 159 | goto end 160 | ) 161 | 162 | if "%1" == "latex" ( 163 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 164 | if errorlevel 1 exit /b 1 165 | echo. 166 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdf" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "latexpdfja" ( 181 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 182 | cd %BUILDDIR%/latex 183 | make all-pdf-ja 184 | cd %~dp0 185 | echo. 186 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 187 | goto end 188 | ) 189 | 190 | if "%1" == "text" ( 191 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 192 | if errorlevel 1 exit /b 1 193 | echo. 194 | echo.Build finished. The text files are in %BUILDDIR%/text. 195 | goto end 196 | ) 197 | 198 | if "%1" == "man" ( 199 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 200 | if errorlevel 1 exit /b 1 201 | echo. 202 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 203 | goto end 204 | ) 205 | 206 | if "%1" == "texinfo" ( 207 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 208 | if errorlevel 1 exit /b 1 209 | echo. 210 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 211 | goto end 212 | ) 213 | 214 | if "%1" == "gettext" ( 215 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 216 | if errorlevel 1 exit /b 1 217 | echo. 218 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 219 | goto end 220 | ) 221 | 222 | if "%1" == "changes" ( 223 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 224 | if errorlevel 1 exit /b 1 225 | echo. 226 | echo.The overview file is in %BUILDDIR%/changes. 227 | goto end 228 | ) 229 | 230 | if "%1" == "linkcheck" ( 231 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 232 | if errorlevel 1 exit /b 1 233 | echo. 234 | echo.Link check complete; look for any errors in the above output ^ 235 | or in %BUILDDIR%/linkcheck/output.txt. 236 | goto end 237 | ) 238 | 239 | if "%1" == "doctest" ( 240 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 241 | if errorlevel 1 exit /b 1 242 | echo. 243 | echo.Testing of doctests in the sources finished, look at the ^ 244 | results in %BUILDDIR%/doctest/output.txt. 245 | goto end 246 | ) 247 | 248 | if "%1" == "coverage" ( 249 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 250 | if errorlevel 1 exit /b 1 251 | echo. 252 | echo.Testing of coverage in the sources finished, look at the ^ 253 | results in %BUILDDIR%/coverage/python.txt. 254 | goto end 255 | ) 256 | 257 | if "%1" == "xml" ( 258 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 259 | if errorlevel 1 exit /b 1 260 | echo. 261 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 262 | goto end 263 | ) 264 | 265 | if "%1" == "pseudoxml" ( 266 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 267 | if errorlevel 1 exit /b 1 268 | echo. 269 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 270 | goto end 271 | ) 272 | 273 | if "%1" == "dummy" ( 274 | %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy 275 | if errorlevel 1 exit /b 1 276 | echo. 277 | echo.Build finished. Dummy builder generates no files. 278 | goto end 279 | ) 280 | 281 | :end 282 | -------------------------------------------------------------------------------- /docs/source/_static/luqum-logo.png: -------------------------------------------------------------------------------- 1 | ../../../luqum-logo.png -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | What is Luqum 2 | ############### 3 | 4 | Luqum stands for LUcene QUery Manipolator. 5 | 6 | It features a python library with a parser for the `Lucene Query DSL`_ as found in 7 | `Solr`_ `query syntax`_ or 8 | `ElasticSearch`_ `query string`_ 9 | 10 | From the parser it builds a tree (see :ref:`tutorial-parsing`). 11 | 12 | This tree can eventually be manipulated 13 | and then transformed back into a query string, 14 | or used to generate other form of query. 15 | 16 | In particular, luqum ships with 17 | a utility to transform a lucene query 18 | into a query using Elasticsearch query DSL language (in json form). 19 | (see :ref:`tutorial-elastic`) 20 | 21 | You may use this to: 22 | 23 | * make some sanity check on query 24 | * make your own check on query (eg. forbid certain fields) 25 | * replace some expressions in query 26 | * pretty print a query 27 | * inject queries in queries 28 | * extend lucene query language semantics 29 | 30 | The parser is built using `PLY`_. 31 | 32 | Luqum is dual licensed under Apache2.0 and LGPLv3. 33 | 34 | .. warning:: 35 | 36 | While used in production by our team for some time, 37 | this library is still a work in progress and also lacks some features. 38 | 39 | Contributions are welcome. 40 | 41 | .. _`Lucene Query DSL`: https://lucene.apache.org/core/3_6_0/queryparsersyntax.html 42 | .. _`Solr`: http://lucene.apache.org/solr/ 43 | .. _`query syntax`: https://wiki.apache.org/solr/SolrQuerySyntax 44 | .. _`ElasticSearch`: https://www.elastic.co/products/elasticsearch 45 | .. _`query string`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html 46 | .. _`PLY`: http://www.dabeaz.com/ply/ply.html 47 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | ##### 3 | 4 | Parsing and constructing queries 5 | ================================== 6 | 7 | This is the core of the library. A parser and the syntax tree definition. 8 | 9 | luqum.parser 10 | --------------- 11 | 12 | .. automodule:: luqum.parser 13 | :members: parser, parse 14 | 15 | luqum.threading 16 | --------------- 17 | 18 | .. automodule:: luqum.thread 19 | :members: parse 20 | 21 | luqum.tree 22 | --------------- 23 | 24 | .. automodule:: luqum.tree 25 | :members: 26 | :member-order: bysource 27 | 28 | .. _elasticsearch-api: 29 | 30 | Transforming to Elastic Search queries 31 | ====================================== 32 | 33 | luqum.schema 34 | ------------ 35 | 36 | .. autoclass:: luqum.elasticsearch.schema.SchemaAnalyzer 37 | :members: 38 | :member-order: bysource 39 | 40 | 41 | luqum.elasticsearch 42 | -------------------- 43 | 44 | .. autoclass:: luqum.elasticsearch.visitor.ElasticsearchQueryBuilder 45 | :members: __init__, __call__ 46 | :member-order: bysource 47 | 48 | 49 | Naming and explaining matches 50 | ============================== 51 | 52 | 53 | luqum.naming 54 | ------------ 55 | 56 | .. automodule:: luqum.naming 57 | :members: 58 | :member-order: bysource 59 | 60 | 61 | Utilities 62 | ========== 63 | 64 | 65 | luqum.visitor: Manipulating trees 66 | ---------------------------------- 67 | 68 | .. automodule:: luqum.visitor 69 | :members: 70 | :member-order: bysource 71 | 72 | 73 | luqum.auto_head_tail: Automatic addition of spaces 74 | -------------------------------------------------- 75 | 76 | .. automodule:: luqum.auto_head_tail 77 | :members: 78 | 79 | luqum.pretty: Pretty printing 80 | ------------------------------ 81 | 82 | .. automodule:: luqum.pretty 83 | :members: 84 | 85 | luqum.check: Checking for validity 86 | ----------------------------------- 87 | 88 | .. automodule:: luqum.check 89 | :members: 90 | 91 | luqum.utils: Misc 92 | ----------------- 93 | 94 | .. automodule:: luqum.utils 95 | :members: 96 | :member-order: bysource 97 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # luqum documentation build configuration file, created by 5 | # sphinx-quickstart on Wed Apr 13 10:25:52 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | #sys.path.insert(0, os.path.abspath('.')) 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | #needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.intersphinx', 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.graphviz', 37 | 'alabaster', 38 | ] 39 | 40 | graphviz_dot_args = ["-Nshape=box"] 41 | graphviz_output_format = "svg" 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = 'luqum' 59 | copyright = '2016, jurismarches' 60 | author = 'jurismarches' 61 | 62 | # The version info for the project you're documenting, acts as replacement for 63 | # |version| and |release|, also used in various other places throughout the 64 | # built documents. 65 | # 66 | # The short X.Y version. 67 | version = '0.7' 68 | # The full version, including alpha/beta/rc tags. 69 | release = '0.7.1' 70 | 71 | # The language for content autogenerated by Sphinx. Refer to documentation 72 | # for a list of supported languages. 73 | # 74 | # This is also used if you do content translation via gettext catalogs. 75 | # Usually you set "language" from the command line for these cases. 76 | language = "en" 77 | 78 | # There are two options for replacing |today|: either, you set today to some 79 | # non-false value, then it is used: 80 | #today = '' 81 | # Else, today_fmt is used as the format for a strftime call. 82 | #today_fmt = '%B %d, %Y' 83 | 84 | # List of patterns, relative to source directory, that match files and 85 | # directories to ignore when looking for source files. 86 | # This patterns also effect to html_static_path and html_extra_path 87 | exclude_patterns = [] 88 | 89 | # The reST default role (used for this markup: `text`) to use for all 90 | # documents. 91 | #default_role = None 92 | 93 | # If true, '()' will be appended to :func: etc. cross-reference text. 94 | #add_function_parentheses = True 95 | 96 | # If true, the current module name will be prepended to all description 97 | # unit titles (such as .. function::). 98 | #add_module_names = True 99 | 100 | # If true, sectionauthor and moduleauthor directives will be shown in the 101 | # output. They are ignored by default. 102 | #show_authors = False 103 | 104 | # The name of the Pygments (syntax highlighting) style to use. 105 | pygments_style = 'sphinx' 106 | 107 | # A list of ignored prefixes for module index sorting. 108 | #modindex_common_prefix = [] 109 | 110 | # If true, keep warnings as "system message" paragraphs in the built documents. 111 | #keep_warnings = False 112 | 113 | # If true, `todo` and `todoList` produce output, else they produce nothing. 114 | todo_include_todos = False 115 | 116 | 117 | # -- Options for HTML output ---------------------------------------------- 118 | 119 | # The theme to use for HTML and HTML Help pages. See the documentation for 120 | # a list of builtin themes. 121 | html_theme = 'alabaster' 122 | 123 | # Theme options are theme-specific and customize the look and feel of a theme 124 | # further. For a list of options available for each theme, see the 125 | # documentation. 126 | #html_theme_options = {} 127 | html_theme_options = { 128 | 'logo': 'luqum-logo.png', 129 | 'description': 'LUcene QUery Manipulator in python', 130 | 'github_user': 'jurismarches', 131 | 'github_repo': 'luqum', 132 | 'github_banner': True} 133 | 134 | # Add any paths that contain custom themes here, relative to this directory. 135 | #html_theme_path = [] 136 | 137 | # The name for this set of Sphinx documents. 138 | # " v documentation" by default. 139 | #html_title = 'luqum v1.0' 140 | 141 | # A shorter title for the navigation bar. Default is the same as html_title. 142 | #html_short_title = None 143 | 144 | # The name of an image file (relative to this directory) to place at the top 145 | # of the sidebar. 146 | #html_logo = None 147 | 148 | # The name of an image file (relative to this directory) to use as a favicon of 149 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 150 | # pixels large. 151 | #html_favicon = None 152 | 153 | # Add any paths that contain custom static files (such as style sheets) here, 154 | # relative to this directory. They are copied after the builtin static files, 155 | # so a file named "default.css" will overwrite the builtin "default.css". 156 | html_static_path = ['_static'] 157 | 158 | # Add any extra paths that contain custom files (such as robots.txt or 159 | # .htaccess) here, relative to this directory. These files are copied 160 | # directly to the root of the documentation. 161 | #html_extra_path = [] 162 | 163 | # If not None, a 'Last updated on:' timestamp is inserted at every page 164 | # bottom, using the given strftime format. 165 | # The empty string is equivalent to '%b %d, %Y'. 166 | #html_last_updated_fmt = None 167 | 168 | # If true, SmartyPants will be used to convert quotes and dashes to 169 | # typographically correct entities. 170 | #html_use_smartypants = True 171 | 172 | # Custom sidebar templates, maps document names to template names. 173 | #html_sidebars = {} 174 | html_sidebars = {'**': [ 175 | 'about.html', 176 | 'navigation.html', 177 | 'relations.html', 178 | 'searchbox.html', 179 | 'donate.html']} 180 | 181 | 182 | # Additional templates that should be rendered to pages, maps page names to 183 | # template names. 184 | #html_additional_pages = {} 185 | 186 | # If false, no module index is generated. 187 | #html_domain_indices = True 188 | 189 | # If false, no index is generated. 190 | #html_use_index = True 191 | 192 | # If true, the index is split into individual pages for each letter. 193 | #html_split_index = False 194 | 195 | # If true, links to the reST sources are added to the pages. 196 | #html_show_sourcelink = True 197 | 198 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 199 | #html_show_sphinx = True 200 | 201 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 202 | #html_show_copyright = True 203 | 204 | # If true, an OpenSearch description file will be output, and all pages will 205 | # contain a tag referring to it. The value of this option must be the 206 | # base URL from which the finished HTML is served. 207 | #html_use_opensearch = '' 208 | 209 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 210 | #html_file_suffix = None 211 | 212 | # Language to be used for generating the HTML full-text search index. 213 | # Sphinx supports the following languages: 214 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 215 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 216 | #html_search_language = 'en' 217 | 218 | # A dictionary with options for the search language support, empty by default. 219 | # 'ja' uses this config value. 220 | # 'zh' user can custom change `jieba` dictionary path. 221 | #html_search_options = {'type': 'default'} 222 | 223 | # The name of a javascript file (relative to the configuration directory) that 224 | # implements a search results scorer. If empty, the default will be used. 225 | #html_search_scorer = 'scorer.js' 226 | 227 | # Output file base name for HTML help builder. 228 | htmlhelp_basename = 'luqumdoc' 229 | 230 | # -- Options for LaTeX output --------------------------------------------- 231 | 232 | latex_elements = { 233 | # The paper size ('letterpaper' or 'a4paper'). 234 | #'papersize': 'letterpaper', 235 | 236 | # The font size ('10pt', '11pt' or '12pt'). 237 | #'pointsize': '10pt', 238 | 239 | # Additional stuff for the LaTeX preamble. 240 | #'preamble': '', 241 | 242 | # Latex figure (float) alignment 243 | #'figure_align': 'htbp', 244 | } 245 | 246 | # Grouping the document tree into LaTeX files. List of tuples 247 | # (source start file, target name, title, 248 | # author, documentclass [howto, manual, or own class]). 249 | latex_documents = [ 250 | (master_doc, 'luqum.tex', 'luqum Documentation', 251 | 'jurismarches', 'manual'), 252 | ] 253 | 254 | # The name of an image file (relative to this directory) to place at the top of 255 | # the title page. 256 | #latex_logo = None 257 | 258 | # For "manual" documents, if this is true, then toplevel headings are parts, 259 | # not chapters. 260 | #latex_use_parts = False 261 | 262 | # If true, show page references after internal links. 263 | #latex_show_pagerefs = False 264 | 265 | # If true, show URL addresses after external links. 266 | #latex_show_urls = False 267 | 268 | # Documents to append as an appendix to all manuals. 269 | #latex_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | #latex_domain_indices = True 273 | 274 | 275 | # -- Options for manual page output --------------------------------------- 276 | 277 | # One entry per manual page. List of tuples 278 | # (source start file, name, description, authors, manual section). 279 | man_pages = [ 280 | (master_doc, 'luqum', 'luqum Documentation', 281 | [author], 1) 282 | ] 283 | 284 | # If true, show URL addresses after external links. 285 | #man_show_urls = False 286 | 287 | 288 | # -- Options for Texinfo output ------------------------------------------- 289 | 290 | # Grouping the document tree into Texinfo files. List of tuples 291 | # (source start file, target name, title, author, 292 | # dir menu entry, description, category) 293 | texinfo_documents = [ 294 | (master_doc, 'luqum', 'luqum Documentation', 295 | author, 'luqum', 'One line description of project.', 296 | 'Miscellaneous'), 297 | ] 298 | 299 | # Documents to append as an appendix to all manuals. 300 | #texinfo_appendices = [] 301 | 302 | # If false, no module index is generated. 303 | #texinfo_domain_indices = True 304 | 305 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 306 | #texinfo_show_urls = 'footnote' 307 | 308 | # If true, do not generate a @detailmenu in the "Top" node's menu. 309 | #texinfo_no_detailmenu = False 310 | 311 | 312 | # Example configuration for intersphinx: refer to the Python standard library. 313 | intersphinx_mapping = {'https://docs.python.org/': None} 314 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. luqum documentation master file, created by 2 | sphinx-quickstart on Wed Apr 13 10:25:52 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to luqum's documentation! 7 | ================================= 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | about 15 | install 16 | quick_start 17 | api 18 | 19 | 20 | 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | 29 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line: 6 | 7 | .. code-block:: bash 8 | 9 | $ [sudo] pip install luqum 10 | 11 | -------------------------------------------------------------------------------- /luqum-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/luqum-logo.png -------------------------------------------------------------------------------- /luqum/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __version__ = '1.0.0' 4 | __version_info__ = tuple(__version__.split('.')) 5 | -------------------------------------------------------------------------------- /luqum/auto_head_tail.py: -------------------------------------------------------------------------------- 1 | """It can be teadious to add spaces in a tree you generate programatically. 2 | 3 | This module provide a utility to transform a tree so that it contains necessary head/tail 4 | for expression to be printable. 5 | """ 6 | 7 | from . import visitor 8 | 9 | 10 | class AutoHeadTail(visitor.TreeTransformer): 11 | """This class implements a transformer so that hand built tree, 12 | can have reasonable values for `head` and `tail` on their items, 13 | in order for the expression to be printable. 14 | """ 15 | 16 | SPACER = " " 17 | 18 | def add_head(self, node): 19 | if not node.head: 20 | node.head = self.SPACER 21 | 22 | def add_tail(self, node): 23 | if not node.tail: 24 | node.tail = self.SPACER 25 | 26 | def visit_base_operation(self, node, context): 27 | new_node = node.clone_item() 28 | children = list(self.clone_children(node, new_node, context)) 29 | # add tail to first node 30 | self.add_tail(children[0]) 31 | # add head and tail to inner nodes 32 | for child in children[1:-1]: 33 | self.add_head(child) 34 | self.add_tail(child) 35 | # add head to last 36 | self.add_head(children[-1]) 37 | new_node.children = children 38 | yield new_node 39 | 40 | def visit_unknown_operation(self, node, context): 41 | new_node = node.clone_item() 42 | children = list(self.clone_children(node, new_node, context)) 43 | # add tail to each node, but last 44 | for child in children[:-1]: 45 | self.add_tail(child) 46 | new_node.children = children 47 | yield new_node 48 | 49 | def visit_not(self, node, context): 50 | new_node = node.clone_item() 51 | children = list(self.clone_children(node, new_node, context)) 52 | # add head to children, to have space between NOT and sub expression 53 | self.add_head(children[0]) 54 | new_node.children = children 55 | yield new_node 56 | 57 | def visit_range(self, node, context): 58 | new_node = node.clone_item() 59 | children = list(self.clone_children(node, new_node, context)) 60 | # add tail to lower_bound, and head to upper bound 61 | self.add_tail(children[0]) 62 | self.add_head(children[-1]) 63 | new_node.children = children 64 | yield new_node 65 | 66 | def __call__(self, tree): 67 | new_tree = self.visit(tree) 68 | return new_tree 69 | 70 | 71 | auto_head_tail = AutoHeadTail() 72 | """method to auto add head and tail to items of a lucene tree so that it is printable 73 | """ 74 | -------------------------------------------------------------------------------- /luqum/check.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import functools 3 | import math 4 | import re 5 | 6 | from . import tree 7 | from . import visitor 8 | from .exceptions import NestedSearchFieldException, ObjectSearchFieldException 9 | from .utils import flatten_nested_fields_specs, normalize_object_fields_specs 10 | 11 | 12 | def camel_to_lower(name): 13 | return "".join( 14 | "_" + w.lower() if w.isupper() else w.lower() 15 | for w in name).lstrip("_") 16 | 17 | 18 | sign = functools.partial(math.copysign, 1) 19 | 20 | 21 | def _check_children(f): 22 | """A decorator to call check on item children 23 | """ 24 | @functools.wraps(f) 25 | def wrapper(self, item, parents): 26 | yield from f(self, item, parents) 27 | for child in item.children: 28 | yield from self.check(child, parents + [item]) 29 | return wrapper 30 | 31 | 32 | class LuceneCheck: 33 | """Check if a query is consistent 34 | 35 | This is intended to use with query constructed as tree, 36 | as well as those parsed by the parser, which is more tolerant. 37 | 38 | :param int zeal: if zeal > 0 do extra check of some pitfalls, depending on zeal level 39 | """ 40 | field_name_re = re.compile(r"^\w+$") 41 | space_re = re.compile(r"\s") 42 | invalid_term_chars_re = re.compile(r"[+/-]") 43 | 44 | SIMPLE_EXPR_FIELDS = ( 45 | tree.Boost, tree.Proximity, tree.Fuzzy, tree.Word, tree.Phrase) 46 | 47 | FIELD_EXPR_FIELDS = tuple(list(SIMPLE_EXPR_FIELDS) + [tree.FieldGroup]) 48 | 49 | def __init__(self, zeal=0): 50 | self.zeal = zeal 51 | 52 | def _check_field_name(self, fname): 53 | return self.field_name_re.match(fname) is not None 54 | 55 | @_check_children 56 | def check_search_field(self, item, parents): 57 | if not self._check_field_name(item.name): 58 | yield "%s is not a valid field name" % item.name 59 | if not isinstance(item.expr, self.FIELD_EXPR_FIELDS): 60 | yield "field expression is not valid : %s" % item 61 | 62 | @_check_children 63 | def check_group(self, item, parents): 64 | if parents and isinstance(parents[-1], tree.SearchField): 65 | yield "Group misuse, after SearchField you should use Group : %s" % parents[-1] 66 | 67 | @_check_children 68 | def check_field_group(self, item, parents): 69 | if not parents or not isinstance(parents[-1], tree.SearchField): 70 | yield ("FieldGroup misuse, it must be used after SearchField : %s" % 71 | (parents[-1] if parents else item)) 72 | 73 | def check_range(self, item, parents): 74 | # TODO check lower bound <= higher bound taking into account wildcard and numbers 75 | return iter([]) 76 | 77 | def check_word(self, item, parents): 78 | if self.space_re.search(item.value): 79 | yield "A single term value can't hold a space %s" % item 80 | if self.zeal and self.invalid_term_chars_re.search(item.value): 81 | yield "Invalid characters in term value: %s" % item.value 82 | 83 | def check_fuzzy(self, item, parents): 84 | if sign(item.degree) < 0: 85 | yield "invalid degree %d, it must be positive" % item.degree 86 | if not isinstance(item.term, tree.Word): 87 | yield "Fuzzy should be on a single term in %s" % str(item) 88 | 89 | def check_proximity(self, item, parents): 90 | if not isinstance(item.term, tree.Phrase): 91 | yield "Proximity can be only on a phrase in %s" % str(item) 92 | 93 | @_check_children 94 | def check_boost(self, item, parents): 95 | return iter([]) 96 | 97 | @_check_children 98 | def check_base_operation(self, item, parents): 99 | return iter([]) 100 | 101 | @_check_children 102 | def check_plus(self, item, parents): 103 | return iter([]) 104 | 105 | def _check_not_operator(self, item, parents): 106 | """Common checker for NOT and - operators""" 107 | if self.zeal: 108 | if isinstance(parents[-1], tree.OrOperation): 109 | yield ("Prohibit or Not really means 'AND NOT' " + 110 | "wich is inconsistent with OR operation in %s" % parents[-1]) 111 | 112 | @_check_children 113 | def check_not(self, item, parents): 114 | return self._check_not_operator(item, parents) 115 | 116 | @_check_children 117 | def check_prohibit(self, item, parents): 118 | return self._check_not_operator(item, parents) 119 | 120 | def check(self, item, parents=[]): 121 | # dispatching check to anothe method 122 | for cls in item.__class__.mro(): 123 | meth = getattr(self, "check_" + camel_to_lower(cls.__name__), None) 124 | if meth is not None: 125 | yield from meth(item, parents) 126 | break 127 | else: 128 | yield "Unknown item type %s : %s" % (item.__class__.__name__, str(item)) 129 | 130 | def __call__(self, tree): 131 | """return True only if there are no error 132 | """ 133 | for error in self.check(tree): 134 | return False 135 | return True 136 | 137 | def errors(self, tree): 138 | """List all errors""" 139 | return list(self.check(tree)) 140 | 141 | 142 | class CheckNestedFields(visitor.TreeVisitor): 143 | """ 144 | Visit the lucene tree to make some checks 145 | 146 | In particular to check nested fields. 147 | 148 | :param nested_fields: a dict where keys are name of nested fields, 149 | values are dict of sub-nested fields or an empty dict for leaf 150 | :param object_fields: 151 | this is either None, in which case unknown object fields will be accepted, 152 | or a dict of sub-nested fields (like nested_fields) 153 | """ 154 | 155 | def __init__(self, nested_fields, object_fields=None, sub_fields=None): 156 | assert isinstance(nested_fields, dict) 157 | self.object_fields = normalize_object_fields_specs(object_fields) 158 | self.object_prefixes = set(k.rsplit(".", 1)[0] for k in self.object_fields or []) 159 | self.nested_fields = flatten_nested_fields_specs(nested_fields) 160 | self.nested_prefixes = set(k.rsplit(".", 1)[0] for k in self.nested_fields) 161 | self.sub_fields = normalize_object_fields_specs(sub_fields) 162 | super().__init__(track_parents=True) 163 | 164 | def visit_search_field(self, node, context): 165 | """ 166 | On search field node, check nested fields logic 167 | """ 168 | child_context = dict(context) # copy 169 | child_context["prefix"] = context["prefix"] + node.name.split(".") 170 | yield from self.generic_visit(node, child_context) 171 | 172 | def _check_final_operation(self, node, context): 173 | prefix = context["prefix"] 174 | if prefix: 175 | fullname = ".".join(prefix) 176 | if fullname in self.nested_prefixes: 177 | raise NestedSearchFieldException( 178 | ('''"{expr}" can't be directly attributed to "{field}"''' + 179 | ''' as it is a nested field''') 180 | .format(expr=str(node), field=fullname)) 181 | elif fullname in self.object_prefixes: 182 | raise NestedSearchFieldException( 183 | ('''"{expr}" can't be directly attributed to "{field}"''' + 184 | ''' as it is an object field''') 185 | .format(expr=str(node), field=fullname)) 186 | # note : the above check do not stand for subfield, 187 | # as their field can have an expression 188 | elif len(prefix) > 1: 189 | unknown_field = ( 190 | self.sub_fields is not None and 191 | self.object_fields is not None and 192 | fullname not in self.sub_fields and 193 | fullname not in self.object_fields and 194 | fullname not in self.nested_fields) 195 | if unknown_field: 196 | raise ObjectSearchFieldException( 197 | '''"{expr}" attributed to unknown nested or object field "{field}"''' 198 | .format(expr=str(node), field=fullname)) 199 | 200 | def visit_phrase(self, node, context): 201 | """ 202 | On phrase field, verify term is in a final search field 203 | """ 204 | yield self._check_final_operation(node, context) 205 | 206 | def visit_term(self, node, context): 207 | """ 208 | On term field, verify term is in a final search field 209 | """ 210 | yield self._check_final_operation(node, context) 211 | 212 | def __call__(self, tree): 213 | return list(self.visit_iter(tree, context={"prefix": []})) 214 | -------------------------------------------------------------------------------- /luqum/deprecated_utils.py: -------------------------------------------------------------------------------- 1 | """Deprecated visitor helper classes. 2 | 3 | see :py:mod:`luqum.visitor` for newer implementations 4 | """ 5 | 6 | import warnings 7 | 8 | from .visitor import camel_to_lower 9 | 10 | 11 | class LuceneTreeVisitor: 12 | """ 13 | Tree Visitor base class, inspired by python's :class:`ast.NodeVisitor`. 14 | 15 | This class is meant to be subclassed, with the subclass implementing 16 | visitor methods for each Node type it is interested in. 17 | 18 | By default, those visitor method should be named ``'visit_'`` + class 19 | name of the node, converted to lower_case (ie: visit_search_node for a 20 | SearchNode class). 21 | 22 | You can tweak this behaviour by overriding the `visitor_method_prefix` & 23 | `generic_visitor_method_name` class attributes. 24 | 25 | If the goal is to modify the initial tree, 26 | use :py:class:`LuceneTreeTranformer` instead. 27 | """ 28 | visitor_method_prefix = 'visit_' 29 | generic_visitor_method_name = 'generic_visit' 30 | 31 | _get_method_cache = None 32 | 33 | def _get_method(self, node): 34 | if self._get_method_cache is None: 35 | self._get_method_cache = {} 36 | try: 37 | meth = self._get_method_cache[type(node)] 38 | except KeyError: 39 | for cls in node.__class__.mro(): 40 | try: 41 | method_name = "{}{}".format( 42 | self.visitor_method_prefix, 43 | camel_to_lower(cls.__name__) 44 | ) 45 | meth = getattr(self, method_name) 46 | break 47 | except AttributeError: 48 | continue 49 | else: 50 | meth = getattr(self, self.generic_visitor_method_name) 51 | self._get_method_cache[type(node)] = meth 52 | return meth 53 | 54 | def visit(self, node, parents=None): 55 | """ Basic, recursive traversal of the tree. """ 56 | warnings.warn( 57 | "LuceneTreeVisitor is deprecated in favor of visitor.TreeVisitor", 58 | DeprecationWarning, 59 | ) 60 | parents = parents or [] 61 | method = self._get_method(node) 62 | yield from method(node, parents) 63 | for child in node.children: 64 | yield from self.visit(child, parents + [node]) 65 | 66 | def generic_visit(self, node, parents=None): 67 | """ 68 | Default visitor function, called if nothing matches the current node. 69 | """ 70 | return iter([]) # No-op 71 | 72 | 73 | class LuceneTreeTransformer(LuceneTreeVisitor): 74 | """ 75 | A :class:`LuceneTreeVisitor` subclass that walks the abstract syntax tree 76 | and allows modifications of traversed nodes. 77 | 78 | The `LuceneTreeTransormer` will walk the AST and use the return value of the 79 | visitor methods to replace or remove the old node. If the return value of 80 | the visitor method is ``None``, the node will be removed from its location, 81 | otherwise it is replaced with the return value. The return value may be the 82 | original node, in which case no replacement takes place. 83 | """ 84 | 85 | def replace_node(self, old_node, new_node, parent): 86 | for k, v in parent.__dict__.items(): # pragma: no branch 87 | if v == old_node: 88 | parent.__dict__[k] = new_node 89 | break 90 | elif isinstance(v, list): 91 | try: 92 | i = v.index(old_node) 93 | if new_node is None: 94 | del v[i] 95 | else: 96 | v[i] = new_node 97 | break 98 | except ValueError: 99 | pass # this was not the attribute containing old_node 100 | elif isinstance(v, tuple): 101 | try: 102 | i = v.index(old_node) 103 | v = list(v) 104 | if new_node is None: 105 | del v[i] 106 | else: 107 | v[i] = new_node 108 | parent.__dict__[k] = tuple(v) 109 | break 110 | except ValueError: 111 | pass # this was not the attribute containing old_node 112 | 113 | def generic_visit(self, node, parent=None): 114 | return node 115 | 116 | def visit(self, node, parents=None): 117 | """ 118 | Recursively traverses the tree and replace nodes with the appropriate 119 | visitor method's return values. 120 | """ 121 | warnings.warn( 122 | "LuceneTreeTransformer is deprecated in favor of visitor.TreeTransformer", 123 | DeprecationWarning, 124 | ) 125 | parents = parents or [] 126 | method = self._get_method(node) 127 | new_node = method(node, parents) 128 | if parents: 129 | self.replace_node(node, new_node, parents[-1]) 130 | node = new_node 131 | if node is not None: 132 | for child in node.children: 133 | self.visit(child, parents + [node]) 134 | return node 135 | 136 | 137 | class LuceneTreeVisitorV2(LuceneTreeVisitor): 138 | """ 139 | V2 of the LuceneTreeVisitor allowing to evaluate the AST 140 | 141 | It differs from py:cls:`LuceneTreeVisitor` 142 | because it's up to the visit method to recursively call children (or not) 143 | 144 | This class is meant to be subclassed, with the subclass implementing 145 | visitor methods for each Node type it is interested in. 146 | 147 | By default, those visitor method should be named ``'visit_'`` + class 148 | name of the node, converted to lower_case (ie: visit_search_node for a 149 | SearchNode class). 150 | 151 | You can tweak this behaviour by overriding the `visitor_method_prefix` & 152 | `generic_visitor_method_name` class attributes. 153 | 154 | If the goal is to modify the initial tree, 155 | use :py:class:`LuceneTreeTranformer` instead. 156 | """ 157 | 158 | def visit(self, node, parents=None, context=None): 159 | """ Basic, recursive traversal of the tree. 160 | 161 | :param list parents: the list of parents 162 | :parma dict context: a dict of contextual variable for free use 163 | to track states while traversing the tree 164 | """ 165 | warnings.warn( 166 | "LuceneTreeVisitorV2 is deprecated in favor of visitor.TreeVisitor", 167 | DeprecationWarning, 168 | ) 169 | if parents is None: 170 | parents = [] 171 | 172 | method = self._get_method(node) 173 | return method(node, parents, context) 174 | 175 | def generic_visit(self, node, parents=None, context=None): 176 | """ 177 | Default visitor function, called if nothing matches the current node. 178 | """ 179 | raise AttributeError( 180 | "No visitor found for this type of node: {}".format( 181 | node.__class__ 182 | ) 183 | ) 184 | -------------------------------------------------------------------------------- /luqum/elasticsearch/__init__.py: -------------------------------------------------------------------------------- 1 | from .visitor import ElasticsearchQueryBuilder # noqa: 2 | from .schema import SchemaAnalyzer # noqa: 3 | -------------------------------------------------------------------------------- /luqum/elasticsearch/nested.py: -------------------------------------------------------------------------------- 1 | """If you have a query with a nested query containing operations, 2 | when using named queries, Elasticsearch won't report inner matching. 3 | 4 | This is a problem if you extensively use it. 5 | """ 6 | 7 | 8 | def get_first_name(query): 9 | if isinstance(query, dict): 10 | if "_name" in query: 11 | return query["_name"] 12 | elif "bool" in query: 13 | # do not go down bool 14 | return None 15 | else: 16 | children = query.values() 17 | elif isinstance(query, list): 18 | children = query 19 | else: 20 | return None 21 | iter_candidates = (get_first_name(child) for child in children) 22 | candidates = [candidate for candidate in iter_candidates if candidate is not None] 23 | return candidates[0] if candidates else None 24 | 25 | 26 | def extract_nested_queries(query, query_nester=None): 27 | """given a query, 28 | extract all queries that are under a nested query and boolean operations, 29 | returning an atomic nested version of them. 30 | Those nested queries, also take care of changing the name to the nearest inner name, 31 | 32 | This is useful for Elasticsearch won't go down explaining why a nested query is matching. 33 | 34 | :param dict query: elasticsearch query to analyze 35 | :param callable query_nester: this is the function called to nest sub queries, leave it default 36 | :return list: queries that you should run to get all matching 37 | 38 | .. note:: because we re-nest part of bool queries, results might not be accurate 39 | for:: 40 | {"bool": "must" : [ 41 | {"nested": {"path": "a", "match": {"x": "y"}}}, 42 | {"nested": {"path": "a", "match": {"x": "z"}}} 43 | ]} 44 | is not the same as:: 45 | {"nested": {"path": "a", "bool": "must": [{"match": {"x": "y"}}, {"match": {"x": "z"}}]}} 46 | 47 | if x is multivalued. 48 | The first would match `{"a": [{"x": "y"}, {"x": "z"}]}` 49 | While the second would only match if `x` contains `"y z"` or `"z y"` 50 | """ 51 | queries = [] # this contains our result 52 | in_nested = query_nester is not None 53 | sub_query_nester = query_nester 54 | if isinstance(query, dict): 55 | if "nested" in query: 56 | params = {k: v for k, v in query["nested"].items() if k not in ("query", "name")} 57 | 58 | def sub_query_nester_func(req, name): 59 | nested = {"nested": {"query": req, **params}} 60 | if query_nester is not None: 61 | nested = query_nester(nested, name) 62 | if name is not None: 63 | nested["nested"]["_name"] = name 64 | return nested 65 | 66 | sub_query_nester = sub_query_nester_func 67 | 68 | bool_param = {"must", "should", "must_not"} & set(query.keys()) 69 | if bool_param and in_nested: 70 | # we are in a list of operations in a bool inside a nested, 71 | # make a query with nested on sub arguments 72 | op, = bool_param # must or should or must_not 73 | # normalize to a list 74 | sub_queries = query[op] if isinstance(query[op], list) else [query[op]] 75 | # add nesting 76 | nested_sub_queries = [ 77 | query_nester(sub_query, get_first_name(sub_query)) for sub_query in sub_queries 78 | ] 79 | # those are queries we want to return 80 | queries.extend(nested_sub_queries) 81 | # continue processing in each sub query 82 | # (before nesting, nesting is contained in query_nester) 83 | children = sub_queries 84 | else: 85 | children = query.values() 86 | elif isinstance(query, list): 87 | children = query 88 | else: 89 | # leaf: final recursivity 90 | children = [] 91 | 92 | # recurse 93 | for child_query in children: 94 | queries.extend( 95 | extract_nested_queries(child_query, query_nester=sub_query_nester) 96 | ) 97 | return queries 98 | -------------------------------------------------------------------------------- /luqum/elasticsearch/schema.py: -------------------------------------------------------------------------------- 1 | """Analyzing elasticSearch schema to provide helpers for query transformation 2 | """ 3 | 4 | 5 | class SchemaAnalyzer: 6 | """An helper that analyze ElasticSearch schema, to give you suitable options 7 | to use when transforming queries. 8 | 9 | :param dict schema: the index settings as a dict. 10 | """ 11 | 12 | def __init__(self, schema): 13 | self.settings = schema.get("settings", {}) 14 | mappings = schema.get("mappings", {}) 15 | if mappings.get("properties"): 16 | # ES >= 6 : one document type per index 17 | self.mappings = {"_doc": mappings} 18 | else: 19 | # ES < 6 : multiple document types per index allowed 20 | self.mappings = mappings 21 | 22 | def _dot_name(self, fname, parents): 23 | return ".".join([p[0] for p in parents] + [fname]) 24 | 25 | def default_field(self): 26 | try: 27 | return self.settings["query"]["default_field"] 28 | except KeyError: 29 | return "*" 30 | 31 | def _walk_properties(self, properties, parents=None, subfields=False): 32 | if parents is None: 33 | parents = [] 34 | for fname, fdef in properties.items(): 35 | yield fname, fdef, parents 36 | if subfields and "fields" in fdef: 37 | subfield_parents = parents + [(fname, fdef)] 38 | subdef = dict(fdef) # sub field definition overload their parents one 39 | subfield_defs = subdef.pop("fields") 40 | for fname, fdef in subfield_defs.items(): 41 | fdef = dict(subdef, **fdef) 42 | yield fname, fdef, subfield_parents 43 | inner_properties = fdef.get("properties", {}) 44 | if inner_properties: 45 | new_parents = parents + [(fname, fdef)] 46 | yield from self._walk_properties(inner_properties, new_parents, subfields) 47 | 48 | def iter_fields(self, subfields=False): 49 | for mapping in self.mappings.values(): 50 | yield from self._walk_properties(mapping.get("properties", {}), subfields=subfields) 51 | 52 | def not_analyzed_fields(self): 53 | for fname, fdef, parents in self.iter_fields(subfields=True): 54 | not_analyzed = ( 55 | (fdef.get("type") == "string" and fdef.get("index", "") == "not_analyzed") or 56 | fdef.get("type") not in ("text", "string", "nested", "object") 57 | ) 58 | if not_analyzed: 59 | yield self._dot_name(fname, parents) 60 | 61 | def nested_fields(self): 62 | result = {} 63 | for fname, fdef, parents in self.iter_fields(): 64 | pdef = parents[-1][1] if parents else {} 65 | if pdef.get("type") == "nested": 66 | target = result 67 | cumulated = [] 68 | for n, _ in parents: 69 | cumulated.append(n) 70 | key = ".".join(cumulated) 71 | if key in target: 72 | target = target[key] 73 | cumulated = [] 74 | if cumulated: 75 | key = ".".join(cumulated) 76 | target = target.setdefault(key, {}) 77 | target[fname] = {} 78 | return result 79 | 80 | def object_fields(self): 81 | for fname, fdef, parents in self.iter_fields(): 82 | pdef = parents[-1][1] if parents else {} 83 | if pdef.get("type") == "object" and fdef.get("type") not in ("object", "nested"): 84 | yield self._dot_name(fname, parents) 85 | 86 | def sub_fields(self): 87 | """return all known subfields 88 | """ 89 | # we do not ask subfields, for they would be lost in the mass 90 | for fname, fdef, parents in self.iter_fields(): 91 | subfields = fdef.get("fields") 92 | if subfields: 93 | subfield_parents = parents + [(fname, fdef)] 94 | for subname in subfields: 95 | yield self._dot_name(subname, subfield_parents) 96 | 97 | def query_builder_options(self): 98 | """return options suitable for 99 | :py:class:`luqum.elasticsearch.visitor.ElasticsearchQueryBuilder` 100 | """ 101 | return { 102 | "default_field": self.default_field(), 103 | "not_analyzed_fields": list(self.not_analyzed_fields()), 104 | "nested_fields": self.nested_fields(), 105 | "object_fields": list(self.object_fields()), 106 | } 107 | -------------------------------------------------------------------------------- /luqum/exceptions.py: -------------------------------------------------------------------------------- 1 | class InconsistentQueryException(Exception): 2 | """Raised when a query have a problem in its structure 3 | """ 4 | 5 | 6 | class OrAndAndOnSameLevel(InconsistentQueryException): 7 | """ 8 | Raised when a OR and a AND are on the same level as we don't know how to 9 | handle this case 10 | """ 11 | 12 | 13 | class NestedSearchFieldException(InconsistentQueryException): 14 | """ 15 | Raised when a SearchField is nested in an other SearchField as it doesn't 16 | make sense. For Instance field1:(spam AND field2:eggs) 17 | """ 18 | 19 | 20 | class ObjectSearchFieldException(InconsistentQueryException): 21 | """ 22 | Raised when a doted field name is queried which is not an object field 23 | """ 24 | 25 | 26 | class ParseError(ValueError): 27 | """Exception while parsing a lucene statement 28 | """ 29 | 30 | 31 | class ParseSyntaxError(ParseError): 32 | """Raised when parser encounters an invalid statement 33 | """ 34 | 35 | 36 | class IllegalCharacterError(ParseError): 37 | """ 38 | Raised when parser encounters an invalid character 39 | """ 40 | -------------------------------------------------------------------------------- /luqum/head_tail.py: -------------------------------------------------------------------------------- 1 | """Utilities to manage head and tail of elements 2 | 3 | The scope is to avoid loosing part of the original text in the final tree. 4 | """ 5 | from .tree import Item 6 | 7 | 8 | class TokenValue: 9 | 10 | def __init__(self, value): 11 | self.value = value 12 | self.pos = None 13 | self.size = None 14 | self.head = "" 15 | self.tail = "" 16 | 17 | def __repr__(self): 18 | return "TokenValue(%s)" % self.value 19 | 20 | def __str__(self): 21 | return str(self.value) if self.value else "" 22 | 23 | 24 | class HeadTailLexer: 25 | """Utility to handle head and tail at lexer time. 26 | """ 27 | 28 | LEXER_ATTR = "_luqum_headtail" 29 | 30 | @classmethod 31 | def handle(cls, token, orig_value): 32 | """Handling a token. 33 | 34 | .. note:: 35 | PLY does not gives acces to previous tokens, 36 | although it does not provide any infrastructure for handling specific state. 37 | 38 | So we use the strategy 39 | of puting a :py:cls:`HeadTailLexer`instance as an attribute of the lexer 40 | each time we start a new tokenization. 41 | """ 42 | # get instance 43 | if token.lexpos == 0: 44 | # first token make instance 45 | instance = cls() 46 | setattr(token.lexer, cls.LEXER_ATTR, instance) 47 | else: 48 | instance = getattr(token.lexer, cls.LEXER_ATTR) 49 | # handle 50 | instance.handle_token(token, orig_value) 51 | 52 | def __init__(self): 53 | self.head = None 54 | """This will track the head of next element, useful only for first element 55 | """ 56 | self.last_elt = None 57 | """This will track the last token, so we can use it to add the tail to it. 58 | """ 59 | 60 | def handle_token(self, token, orig_value): 61 | """Handle head and tail for tokens 62 | 63 | The scope is to avoid loosing part of the original text and keep it in elements. 64 | """ 65 | # handle headtail 66 | if token.type == "SEPARATOR": 67 | if token.lexpos == 0: 68 | # spaces at expression start, head for next token 69 | self.head = token.value 70 | else: 71 | # tail of last processed token 72 | if self.last_elt is not None: 73 | self.last_elt.value.tail += token.value 74 | else: 75 | # if there is a head, apply 76 | head = self.head 77 | if head is not None: 78 | token.value.head = head 79 | self.head = None 80 | # keep tracks of token, to apply tail later 81 | self.last_elt = token 82 | # also set pos and size 83 | if isinstance(token.value, (Item, TokenValue)): 84 | token.value.pos = token.lexpos 85 | token.value.size = len(orig_value) 86 | 87 | 88 | token_headtail = HeadTailLexer.handle 89 | 90 | 91 | class HeadTailManager: 92 | """Utility to hande head and tail at expression parse time 93 | """ 94 | 95 | def pos(self, p, head_transfer=False, tail_transfer=False): 96 | """Compute pos and size of element 0 based on it's parts (p[1:]) 97 | 98 | :param list p: the parser expression as in PLY 99 | :param bool head_transfer: True if head of first child will be transfered to p[0] 100 | :param bool tail_transfer: True if tail of last child wiil be transfered to p[0] 101 | """ 102 | # pos 103 | if p[1].pos is not None: 104 | p[0].pos = p[1].pos 105 | if not head_transfer: 106 | # head is'nt transfered, so we are before it 107 | p[0].pos -= len(p[1].head) 108 | # size 109 | p[0].size = sum( 110 | (elt.size or 0) + len(elt.head or "") + len(elt.tail or "") for elt in p[1:]) 111 | if head_transfer and p[1].head: 112 | # we account head in size, remove it 113 | p[0].size -= len(p[1].head) 114 | last_p = p[len(p) - 1] # negative indexing not supported by PLY 115 | if tail_transfer and last_p.tail: 116 | # we account head in size, remove it 117 | p[0].size -= len(last_p.tail) 118 | 119 | def binary_operation(self, p, op_tail): 120 | self.pos(p, head_transfer=False, tail_transfer=False) 121 | # correct size 122 | p[0].size -= len(op_tail) 123 | 124 | def simple_term(self, p): 125 | self.pos(p, head_transfer=True, tail_transfer=True) 126 | p[0].head = p[1].head 127 | p[0].tail = p[1].tail 128 | 129 | def unary(self, p): 130 | """OP expr""" 131 | self.pos(p, head_transfer=True, tail_transfer=False) 132 | p[0].head = p[1].head 133 | p[2].head = p[1].tail + p[2].head 134 | 135 | def post_unary(self, p): 136 | """expr OP""" 137 | self.pos(p, head_transfer=False, tail_transfer=True) 138 | p[1].tail += p[2].head 139 | p[0].tail = p[2].tail 140 | 141 | def paren(self, p): 142 | """( expr )""" 143 | self.pos(p, head_transfer=True, tail_transfer=True) 144 | # p[0] is global element (Group or FieldGroup) 145 | # p[2] is content 146 | # p[1] is left parenthesis 147 | p[0].head = p[1].head 148 | p[2].head = p[1].tail + p[2].head 149 | # p[3] is right parenthesis 150 | p[2].tail += p[3].head 151 | p[0].tail = p[3].tail 152 | 153 | def range(self, p): 154 | """[ expr TO expr ]""" 155 | self.pos(p, head_transfer=True, tail_transfer=True) 156 | # p[0] is global element (Range) 157 | # p[2] is lower bound 158 | p[0].head = p[1].head 159 | p[2].head = p[1].tail + p[2].head 160 | # p[3] is TO 161 | # p[4] is upper bound 162 | p[2].tail += p[3].head 163 | p[4].head = p[3].tail + p[4].head 164 | # p[5] is upper braket 165 | p[4].tail += p[5].head 166 | p[0].tail = p[5].tail 167 | 168 | def search_field(self, p): 169 | """name: expr""" 170 | self.pos(p, head_transfer=True, tail_transfer=False) 171 | # p[0] is global element (SearchField) 172 | # p[1] is search field name 173 | # p[2] is COLUMN 174 | p[0].head = p[1].head 175 | if p[1].tail or p[2].head: 176 | pass # FIXME: add warning, or handle space between point and name in SearchField ? 177 | # p[3] is the expression 178 | p[3].head = p[2].tail + p[3].head 179 | 180 | 181 | head_tail = HeadTailManager() 182 | """singleton of HeadTailManager 183 | """ 184 | -------------------------------------------------------------------------------- /luqum/naming.py: -------------------------------------------------------------------------------- 1 | """Support for naming expressions 2 | 3 | In order to use elastic search named query, we need to be able to assign names to expressions 4 | and retrieve their positions in the query text. 5 | 6 | This module adds support for that. 7 | """ 8 | from . import tree 9 | from .visitor import PathTrackingVisitor, PathTrackingTransformer 10 | 11 | 12 | #: Names are added to tree items via an attribute named `_luqum_name` 13 | NAME_ATTR = "_luqum_name" 14 | 15 | 16 | def set_name(node, value): 17 | setattr(node, NAME_ATTR, value) 18 | 19 | 20 | def get_name(node): 21 | return getattr(node, NAME_ATTR, None) 22 | 23 | 24 | class TreeAutoNamer(PathTrackingVisitor): 25 | """Helper for :py:func:`auto_name` 26 | """ 27 | 28 | LETTERS = "abcdefghilklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 29 | _pos_letter = {l: i for i, l in enumerate(LETTERS)} 30 | 31 | def next_name(self, name): 32 | """Given name, return next name 33 | 34 | :: 35 | >>> tan = TreeAutoNamer() 36 | >>> tan.next_name(None) 37 | 'a' 38 | >>> tan.next_name('aZ') 39 | 'aZa' 40 | >>> tan.next_name('azb') 41 | 'azc' 42 | """ 43 | if name is None: 44 | # bootstrap 45 | return self.LETTERS[0] 46 | else: 47 | actual_pos = self._pos_letter[name[-1]] 48 | try: 49 | # we want to increment last letter 50 | return name[:-1] + self.LETTERS[actual_pos + 1] 51 | except IndexError: 52 | # we exhausts letters, add a new one instead 53 | return name + self.LETTERS[0] 54 | 55 | def visit_base_operation(self, node, context): 56 | """name is to be set on children of operations 57 | """ 58 | # put a _name on each children 59 | name = context["global"]["name"] 60 | for i, child in enumerate(node.children): 61 | name = self.next_name(name) 62 | set_name(child, name) 63 | # remember name to path 64 | context["global"]["name_to_path"][name] = context["path"] + (i,) 65 | # put name back in global context 66 | context["global"]["name"] = name 67 | yield from self.generic_visit(node, context) 68 | 69 | def visit(self, node): 70 | """visit the tree and add names to nodes while tracking their path 71 | """ 72 | # trick: we use a "global" dict inside context dict so that when we copy context, 73 | # we still track the same objects 74 | context = {"global": {"name": None, "name_to_path": {}}} 75 | super().visit(node, context) 76 | name_to_path = context["global"]["name_to_path"] 77 | # handle special case, if we have no name so far, put one on the root 78 | if not name_to_path: 79 | node_name = self.next_name(context["global"]["name"]) 80 | set_name(node, node_name) 81 | name_to_path[node_name] = () 82 | return name_to_path 83 | 84 | 85 | def auto_name(tree, targets=None, all_names=False): 86 | """Automatically add names to nodes of a parse tree, in order to be able to track matching. 87 | 88 | We add them to top nodes under operations as this is where it is useful for ES named queries 89 | 90 | :return dict: association of name with the path (as a tuple) to a the corresponding children 91 | """ 92 | return TreeAutoNamer().visit(tree) 93 | 94 | 95 | def matching_from_names(names, name_to_path): 96 | """Utility to convert a list of name and the result of auto_name 97 | to the matching parameter for :py:class:`MatchingPropagator` 98 | 99 | :param list names: list of names 100 | :param dict name_to_path: association of names with path to children 101 | :return tuple: (set of matching paths, set of other known paths) 102 | """ 103 | matching = {name_to_path[name] for name in names} 104 | return (matching, set(name_to_path.values()) - matching) 105 | 106 | 107 | def element_from_path(tree, path): 108 | """Given a tree, retrieve element corresponding to path 109 | 110 | :param luqum.tree.Item tree: luqum expression tree 111 | :param tuple path: tuple representing top down access to a child 112 | :return luqum.tree.Item: target item 113 | """ 114 | # python likes iterations over recursivity 115 | node = tree 116 | path = list(path) 117 | while path: 118 | node = node.children[path.pop(0)] 119 | return node 120 | 121 | 122 | def element_from_name(tree, name, name_to_path): 123 | return element_from_path(tree, name_to_path[name]) 124 | 125 | 126 | class MatchingPropagator: 127 | """Class propagating matching to upper elements based on known base element matching 128 | 129 | :param luqum.tree.Item default_operation: tells how to treat UnknownOperation. 130 | Choose between :py:class:`luqum.tree.OrOperation` and :py:class:`luqum.tree.AndOperation` 131 | """ 132 | 133 | OR_NODES = (tree.OrOperation,) 134 | """A tuple of nodes types considered as OR operations 135 | """ 136 | NEGATION_NODES = (tree.Not, tree.Prohibit) 137 | """A tuple of nodes types considered as NOT operations 138 | """ 139 | NO_CHILDREN_PROPAGATE = (tree.Range, tree.BaseApprox) 140 | """A tuple of nodes for which propagation is of no use 141 | """ 142 | 143 | def __init__(self, default_operation=tree.OrOperation): 144 | if default_operation is tree.OrOperation: 145 | self.OR_NODES = self.OR_NODES + (tree.UnknownOperation,) 146 | 147 | def _status_from_parent(self, path, matching, other): 148 | """Get status from nearest parent in hierarchie which had a name 149 | """ 150 | if path in matching: 151 | return True 152 | elif path in other: 153 | return False 154 | elif not path: 155 | return False 156 | else: 157 | return self._status_from_parent(path[:-1], matching, other) 158 | 159 | def _propagate(self, node, matching, other, path): 160 | """recursively propagate matching 161 | 162 | return tuple: ( 163 | node is matching, 164 | set of pathes of matching sub nodes, 165 | set of pathes of non matching sub nodes) 166 | """ 167 | paths_ok = set() # path of nodes that are matching 168 | paths_ko = set() # path of nodes that are not matching 169 | children_status = [] # bool for each children, indicating if it matches or not 170 | # recurse children 171 | if node.children and not isinstance(node, self.NO_CHILDREN_PROPAGATE): 172 | for i, child in enumerate(node.children): 173 | child_ok, sub_ok, sub_ko = self._propagate( 174 | child, matching, other, path + (i,), 175 | ) 176 | paths_ok.update(sub_ok) 177 | paths_ko.update(sub_ko) 178 | children_status.append(child_ok) 179 | # resolve node status 180 | if path in matching: 181 | node_ok = True 182 | elif children_status: # compute from children 183 | # compute parent success from children 184 | operator = any if isinstance(node, self.OR_NODES) else all 185 | node_ok = operator(children_status) 186 | else: 187 | node_ok = self._status_from_parent(path, matching, other) 188 | if isinstance(node, self.NEGATION_NODES): 189 | # negate result 190 | node_ok = not node_ok 191 | # add node to the right set 192 | target_set = paths_ok if node_ok else paths_ko 193 | target_set.add(path) 194 | # return result 195 | return node_ok, paths_ok, paths_ko 196 | 197 | def __call__(self, tree, matching, other=frozenset()): 198 | """ 199 | Given a list of paths that are known to match, 200 | return all pathes in the tree that are matches. 201 | 202 | .. note:: we do not descend into nodes that are positive. 203 | Normally matching just provides nodes at the right levels 204 | for propagation to be effective. 205 | Descending would mean risking to give non consistent information. 206 | 207 | :param list matching: list of path of matching nodes (each path is a tuple) 208 | :param list other: list of other path that had a name, but were not reported as matching 209 | 210 | :return tuple: ( 211 | set of matching path after propagation, 212 | set of non matching pathes after propagation) 213 | """ 214 | tree_ok, paths_ok, paths_ko = self._propagate(tree, matching, other, ()) 215 | return paths_ok, paths_ko 216 | 217 | 218 | class ExpressionMarker(PathTrackingTransformer): 219 | """A visitor to mark a tree based on elements belonging to a path or not 220 | 221 | One intended usage is to add marker around nodes matching a request, 222 | by altering tail and head of elements 223 | """ 224 | 225 | def mark_node(self, node, path, *info): 226 | """implement this in your own code, maybe altering the head / tail arguments 227 | """ 228 | return node 229 | 230 | def generic_visit(self, node, context): 231 | # we simply generate new_node and mark it 232 | new_node, = super().generic_visit(node, context) 233 | yield self.mark_node(new_node, context["path"], *context["info"]) 234 | 235 | def __call__(self, tree, *info): 236 | return self.visit(tree, context={"info": info}) 237 | 238 | 239 | class HTMLMarker(ExpressionMarker): 240 | """from paths that are ok or ko, add html elements with right class around elements 241 | 242 | :param str ok_class: class for elements in paths_ok 243 | :param str ko_class: class for elements in paths_ko 244 | :param str element: html element used to surround sub expressions 245 | """ 246 | 247 | def __init__(self, ok_class="ok", ko_class="ko", element="span"): 248 | super().__init__() 249 | self.ok_class = ok_class 250 | self.ko_class = ko_class 251 | self.element = element 252 | 253 | def css_class(self, path, paths_ok, paths_ko): 254 | return self.ok_class if path in paths_ok else self.ko_class if path in paths_ko else None 255 | 256 | def mark_node(self, node, path, paths_ok, paths_ko, parcimonious): 257 | node_class = self.css_class(path, paths_ok, paths_ko) 258 | add_class = node_class is not None 259 | if add_class and parcimonious: 260 | # find nearest parent with a class 261 | parent_class = None 262 | parent_path = path 263 | while parent_class is None and parent_path: 264 | parent_path = parent_path[:-1] 265 | parent_class = self.css_class(parent_path, paths_ok, paths_ko) 266 | # only add class if different from parent 267 | add_class = node_class != parent_class 268 | if add_class: 269 | node.head = f'<{self.element} class="{node_class}">{node.head}' 270 | node.tail = f'{node.tail}' 271 | return node 272 | 273 | def __call__(self, tree, paths_ok, paths_ko, parcimonious=True): 274 | """representation of tree, adding html elements with right class around subexpressions 275 | according to their presence in paths_ok or paths_ko 276 | 277 | :param tree: a luqum tree 278 | :param paths_ok: set of path to nodes (express as tuple of int) that should get ok_class 279 | :param paths_ko: set of path to nodes that should get ko_class 280 | :param parcimonious: only add class when parent node does not have same class 281 | 282 | :return str: expression with html elements surrounding part of expression 283 | with right class attribute according to paths_ok and paths_ko 284 | """ 285 | new_tree = super().__call__(tree, paths_ok, paths_ko, parcimonious) 286 | return new_tree.__str__(head_tail=True) 287 | -------------------------------------------------------------------------------- /luqum/parsetab.py: -------------------------------------------------------------------------------- 1 | 2 | # parsetab.py 3 | # This file is automatically generated. Do not edit. 4 | # pylint: disable=W,C,R 5 | _tabversion = '3.10' 6 | 7 | _lr_method = 'LALR' 8 | 9 | _lr_signature = 'leftIMPLICIT_OPleftOR_OPleftAND_OPnonassocPLUSMINUSnonassocBOOSTnonassocTOrightUMINUSAND_OP APPROX BOOST COLUMN GREATERTHAN LBRACKET LESSTHAN LPAREN MINUS NOT OR_OP PHRASE PLUS RBRACKET REGEX RPAREN TERM TOexpression : expression OR_OP expressionexpression : expression AND_OP expressionexpression : expression expression %prec IMPLICIT_OPunary_expression : PLUS unary_expressionunary_expression : MINUS unary_expressionunary_expression : NOT unary_expressionexpression : unary_expressionunary_expression : LPAREN expression RPARENunary_expression : LBRACKET phrase_or_possibly_negative_term TO phrase_or_possibly_negative_term RBRACKETpossibly_negative_term : MINUS phrase_or_term %prec UMINUS\n | phrase_or_termphrase_or_possibly_negative_term : possibly_negative_term\n | PHRASEunary_expression : LESSTHAN phrase_or_termunary_expression : GREATERTHAN phrase_or_termunary_expression : TERM COLUMN unary_expressionunary_expression : PHRASEunary_expression : PHRASE APPROXunary_expression : unary_expression BOOSTunary_expression : TERMunary_expression : TERM APPROXunary_expression : REGEXunary_expression : TOphrase_or_term : TERM\n | PHRASE' 10 | 11 | _lr_action_items = {'PLUS':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[3,3,-7,3,3,3,3,-23,-20,-17,-22,3,3,3,-19,-4,-5,-6,3,-24,-14,-25,-15,3,-21,-18,3,3,-8,-16,-9,]),'MINUS':([0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,37,39,41,],[4,4,-7,4,4,4,4,25,-23,-20,-17,-22,4,4,4,-19,-4,-5,-6,4,-24,-14,-25,-15,4,-21,-18,4,4,-8,25,-16,-9,]),'NOT':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[5,5,-7,5,5,5,5,-23,-20,-17,-22,-3,5,5,-19,-4,-5,-6,5,-24,-14,-25,-15,5,-21,-18,-1,-2,-8,-16,-9,]),'LPAREN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[6,6,-7,6,6,6,6,-23,-20,-17,-22,-3,6,6,-19,-4,-5,-6,6,-24,-14,-25,-15,6,-21,-18,-1,-2,-8,-16,-9,]),'LBRACKET':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[7,7,-7,7,7,7,7,-23,-20,-17,-22,-3,7,7,-19,-4,-5,-6,7,-24,-14,-25,-15,7,-21,-18,-1,-2,-8,-16,-9,]),'LESSTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[9,9,-7,9,9,9,9,-23,-20,-17,-22,-3,9,9,-19,-4,-5,-6,9,-24,-14,-25,-15,9,-21,-18,-1,-2,-8,-16,-9,]),'GREATERTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[10,10,-7,10,10,10,10,-23,-20,-17,-22,-3,10,10,-19,-4,-5,-6,10,-24,-14,-25,-15,10,-21,-18,-1,-2,-8,-16,-9,]),'TERM':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[11,11,-7,11,11,11,11,27,-23,27,27,-20,-17,-22,-3,11,11,-19,-4,-5,-6,11,27,-24,-14,-25,-15,11,-21,-18,-1,-2,-8,27,-16,-9,]),'PHRASE':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[12,12,-7,12,12,12,12,24,-23,29,29,-20,-17,-22,-3,12,12,-19,-4,-5,-6,12,29,-24,-14,-25,-15,12,-21,-18,-1,-2,-8,24,-16,-9,]),'REGEX':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[13,13,-7,13,13,13,13,-23,-20,-17,-22,-3,13,13,-19,-4,-5,-6,13,-24,-14,-25,-15,13,-21,-18,-1,-2,-8,-16,-9,]),'TO':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,38,39,41,],[8,8,-7,8,8,8,8,-23,-20,-17,-22,8,8,8,-19,-4,-5,-6,8,37,-12,-13,-11,-24,-14,-25,-15,8,-21,-18,8,8,-8,-10,-16,-9,]),'$end':([1,2,8,11,12,13,14,17,18,19,20,27,28,29,30,32,33,34,35,36,39,41,],[0,-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'OR_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[15,-7,-23,-20,-17,-22,15,-19,-4,-5,-6,15,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'AND_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[16,-7,-23,-20,-17,-22,16,-19,-4,-5,-6,16,-24,-14,-25,-15,-21,-18,16,-2,-8,-16,-9,]),'RPAREN':([2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,36,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'BOOST':([2,8,11,12,13,17,18,19,20,27,28,29,30,32,33,36,39,41,],[17,-23,-20,-17,-22,-19,17,17,17,-24,-14,-25,-15,-21,-18,-8,17,-9,]),'COLUMN':([11,],[31,]),'APPROX':([11,12,],[32,33,]),'RBRACKET':([23,24,26,27,29,38,40,],[-12,-13,-11,-24,-25,-10,41,]),} 12 | 13 | _lr_action = {} 14 | for _k, _v in _lr_action_items.items(): 15 | for _x,_y in zip(_v[0],_v[1]): 16 | if not _x in _lr_action: _lr_action[_x] = {} 17 | _lr_action[_x][_k] = _y 18 | del _lr_action_items 19 | 20 | _lr_goto_items = {'expression':([0,1,6,14,15,16,21,34,35,],[1,14,21,14,34,35,14,14,14,]),'unary_expression':([0,1,3,4,5,6,14,15,16,21,31,34,35,],[2,2,18,19,20,2,2,2,2,2,39,2,2,]),'phrase_or_possibly_negative_term':([7,37,],[22,40,]),'possibly_negative_term':([7,37,],[23,23,]),'phrase_or_term':([7,9,10,25,37,],[26,28,30,38,26,]),} 21 | 22 | _lr_goto = {} 23 | for _k, _v in _lr_goto_items.items(): 24 | for _x, _y in zip(_v[0], _v[1]): 25 | if not _x in _lr_goto: _lr_goto[_x] = {} 26 | _lr_goto[_x][_k] = _y 27 | del _lr_goto_items 28 | _lr_productions = [ 29 | ("S' -> expression","S'",1,None,None,None), 30 | ('expression -> expression OR_OP expression','expression',3,'p_expression_or','parser.py',254), 31 | ('expression -> expression AND_OP expression','expression',3,'p_expression_and','parser.py',260), 32 | ('expression -> expression expression','expression',2,'p_expression_implicit','parser.py',266), 33 | ('unary_expression -> PLUS unary_expression','unary_expression',2,'p_expression_plus','parser.py',272), 34 | ('unary_expression -> MINUS unary_expression','unary_expression',2,'p_expression_minus','parser.py',278), 35 | ('unary_expression -> NOT unary_expression','unary_expression',2,'p_expression_not','parser.py',284), 36 | ('expression -> unary_expression','expression',1,'p_expression_unary','parser.py',290), 37 | ('unary_expression -> LPAREN expression RPAREN','unary_expression',3,'p_grouping','parser.py',295), 38 | ('unary_expression -> LBRACKET phrase_or_possibly_negative_term TO phrase_or_possibly_negative_term RBRACKET','unary_expression',5,'p_range','parser.py',301), 39 | ('possibly_negative_term -> MINUS phrase_or_term','possibly_negative_term',2,'p_possibly_negative_term','parser.py',313), 40 | ('possibly_negative_term -> phrase_or_term','possibly_negative_term',1,'p_possibly_negative_term','parser.py',314), 41 | ('phrase_or_possibly_negative_term -> possibly_negative_term','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',323), 42 | ('phrase_or_possibly_negative_term -> PHRASE','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',324), 43 | ('unary_expression -> LESSTHAN phrase_or_term','unary_expression',2,'p_lessthan','parser.py',329), 44 | ('unary_expression -> GREATERTHAN phrase_or_term','unary_expression',2,'p_greaterthan','parser.py',336), 45 | ('unary_expression -> TERM COLUMN unary_expression','unary_expression',3,'p_field_search','parser.py',343), 46 | ('unary_expression -> PHRASE','unary_expression',1,'p_quoting','parser.py',352), 47 | ('unary_expression -> PHRASE APPROX','unary_expression',2,'p_proximity','parser.py',357), 48 | ('unary_expression -> unary_expression BOOST','unary_expression',2,'p_boosting','parser.py',363), 49 | ('unary_expression -> TERM','unary_expression',1,'p_terms','parser.py',369), 50 | ('unary_expression -> TERM APPROX','unary_expression',2,'p_fuzzy','parser.py',374), 51 | ('unary_expression -> REGEX','unary_expression',1,'p_regex','parser.py',380), 52 | ('unary_expression -> TO','unary_expression',1,'p_to_as_term','parser.py',386), 53 | ('phrase_or_term -> TERM','phrase_or_term',1,'p_phrase_or_term','parser.py',392), 54 | ('phrase_or_term -> PHRASE','phrase_or_term',1,'p_phrase_or_term','parser.py',393), 55 | ] 56 | -------------------------------------------------------------------------------- /luqum/pretty.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """This module provides a pretty printer for lucene query tree. 3 | """ 4 | from .tree import BaseOperation, BaseGroup, SearchField 5 | 6 | 7 | class _StickMarker: 8 | """Use in list between two elements that must stick together 9 | """ 10 | 11 | def __len__(self): 12 | return 0 13 | 14 | 15 | # a marker to avoid a new line between two elements 16 | _STICK_MARKER = _StickMarker() 17 | 18 | 19 | class Prettifier(object): 20 | """Class to generate a pretty printer. 21 | """ 22 | 23 | def __init__(self, indent=4, max_len=80, inline_ops=False): 24 | """ 25 | The pretty printer factory. 26 | 27 | :param int indent: number of space for indentation 28 | :param int max_len: maximum line length in number of characters. 29 | Prettyfier will do its best to keep inside those margin, 30 | but as it can only split on operators, it may not be possible. 31 | :param bool inline_ops: if False (default) operators are printed on a new line 32 | if True, operators are printed at the end of the line. 33 | """ 34 | self.indent = indent 35 | self.prefix = " " * self.indent 36 | self.max_len = max_len 37 | self.inline_ops = inline_ops 38 | 39 | def _get_chains(self, element, parent=None): 40 | """return a list of string and list, and recursively 41 | 42 | An inner list represent a level of indentation 43 | A string is information from the level 44 | """ 45 | if isinstance(element, BaseOperation): 46 | if not isinstance(parent, BaseOperation) or element.op == parent.op: 47 | # same level, this is just associativity 48 | num_children = len(element.children) 49 | for n, child in enumerate(element.children): 50 | yield from self._get_chains(child, element) 51 | if n < num_children - 1: 52 | if self.inline_ops: 53 | yield _STICK_MARKER 54 | if element.op: 55 | yield element.op 56 | else: 57 | # another operation, raise level 58 | new_level = [] 59 | num_children = len(element.children) 60 | for n, child in enumerate(element.children): 61 | new_level.extend(self._get_chains(child, element)) 62 | if n < num_children - 1: 63 | if self.inline_ops: 64 | new_level.append(_STICK_MARKER) 65 | if element.op: 66 | new_level.append(element.op) 67 | yield new_level 68 | elif isinstance(element, BaseGroup): 69 | # raise level 70 | yield "(" 71 | yield list(self._get_chains(element.expr, element)) 72 | if self.inline_ops: 73 | yield _STICK_MARKER 74 | yield ")" 75 | elif isinstance(element, SearchField): 76 | # use recursion on sub expression 77 | yield element.name + ":" 78 | yield _STICK_MARKER 79 | yield from self._get_chains(element.expr, element) 80 | else: 81 | # simple element 82 | yield str(element) 83 | 84 | def _count_chars(self, element): 85 | """Replace each element by the element and a count of chars in it (and recursively) 86 | 87 | This will help, compute if elements can stand on a line or not 88 | """ 89 | if isinstance(element, list): 90 | with_counts = [self._count_chars(c)for c in element] 91 | # when counting we add a space for joining 92 | return with_counts, sum(n + 1 for c, n in with_counts) - 1 93 | else: 94 | return element, len(element) 95 | 96 | def _apply_stick(self, elements): 97 | last = None 98 | sticking = False 99 | for current in elements: 100 | if current == _STICK_MARKER: 101 | assert last is not None, "_STICK_MARKER should never be first !" 102 | sticking = True 103 | elif sticking: 104 | last += " " + current 105 | sticking = False 106 | else: 107 | if last is not None: 108 | yield last 109 | last = current 110 | yield last 111 | 112 | def _concatenates(self, chain_with_counts, char_counts, level=0, in_one_liner=False): 113 | """taking the result of _get_chains after passing through _count_chars, 114 | arrange things, using newlines and indentation when necessary 115 | 116 | :return string: prettified expression 117 | """ 118 | # evaluate if it's feasible in one-line 119 | one_liner = in_one_liner or char_counts < self.max_len - (self.indent * level) 120 | new_level = level if one_liner else level + 1 121 | elements = [ 122 | self._concatenates(c, n, level=new_level, in_one_liner=one_liner) 123 | if isinstance(c, list) 124 | else c 125 | for c, n in chain_with_counts] 126 | elements = self._apply_stick(elements) 127 | prefix = self.prefix if level and not in_one_liner else "" 128 | join_char = " " if one_liner else ("\n" + prefix) 129 | return prefix + join_char.join(line for c in elements for line in c.split("\n")) 130 | 131 | def __call__(self, tree): 132 | """Pretty print the query represented by tree 133 | 134 | :param tree: a query tree using elements from :py:mod:`luqum.tree` 135 | """ 136 | chains = list(self._get_chains(tree)) 137 | chain_with_counts, total = self._count_chars(chains) 138 | return self._concatenates(chain_with_counts, total) 139 | 140 | 141 | prettify = Prettifier() 142 | """prettify function with default parameters 143 | """ 144 | -------------------------------------------------------------------------------- /luqum/thread.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | from . import parser 4 | 5 | thread_local = threading.local() 6 | 7 | 8 | def parse(input=None, lexer=None, debug=False, tracking=False): 9 | """A (hopefully) thread safe version of :py:meth:`luqum.parser.parse` 10 | 11 | PLY is not thread safe because of its lexer state, but cloning it we can be 12 | thread safe. see: https://github.com/jurismarches/luqum/issues/72 13 | 14 | Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used. 15 | They are still present for signature compatibility. 16 | """ 17 | if not hasattr(thread_local, "lexer"): 18 | thread_local.lexer = parser.lexer.clone() 19 | return parser.parser.parse(input, lexer=thread_local.lexer) 20 | -------------------------------------------------------------------------------- /luqum/visitor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Base classes to implement a visitor pattern. 3 | """ 4 | 5 | 6 | def camel_to_lower(name): 7 | return "".join( 8 | "_" + w.lower() if w.isupper() else w.lower() 9 | for w in name).lstrip("_") 10 | 11 | 12 | class TreeVisitor: 13 | """ 14 | Tree Visitor base class. 15 | 16 | This class is meant to be subclassed, with the subclass implementing 17 | visitor methods for each Node type it is interested in. 18 | 19 | By default, those visitor method should be named ``'visit_'`` + class 20 | name of the node, converted to lower_case (ie: visit_search_node for a 21 | SearchNode class)[#tweakvisit]_. 22 | 23 | It's up to the visit method of each node to recursively call children (or not) 24 | It may be done simply by calling the generic_visit method. 25 | 26 | By default the `generic_visit`, simply trigger visit of subnodes, yielding no information. 27 | 28 | If the goal is to modify the initial tree, to get a new modified copy 29 | use :py:class:`TreeTranformer` instead. 30 | 31 | .. [#tweakvisit]: You can tweak this behaviour 32 | by overriding the `visitor_method_prefix` & `generic_visitor_method_name` class attributes. 33 | 34 | :param bool track_parents: if True the context will contain parents of current node as a list. 35 | It's up to you to maintain this list in your own methods. 36 | """ 37 | visitor_method_prefix = 'visit_' 38 | generic_visitor_method_name = 'generic_visit' 39 | 40 | def __init__(self, track_parents=False): 41 | self.track_parents = track_parents 42 | 43 | _get_method_cache = None 44 | 45 | def _get_method(self, node): 46 | if self._get_method_cache is None: 47 | self._get_method_cache = {} 48 | try: 49 | meth = self._get_method_cache[type(node)] 50 | except KeyError: 51 | for cls in node.__class__.mro(): 52 | try: 53 | method_name = "{}{}".format( 54 | self.visitor_method_prefix, 55 | camel_to_lower(cls.__name__) 56 | ) 57 | meth = getattr(self, method_name) 58 | break 59 | except AttributeError: 60 | continue 61 | else: 62 | meth = getattr(self, self.generic_visitor_method_name) 63 | self._get_method_cache[type(node)] = meth 64 | return meth 65 | 66 | def visit(self, tree, context=None): 67 | """Traversal of tree 68 | 69 | :param luqum.tree.Item tree: a tree representing a lucene expression 70 | :param dict context: a dict with initial values for context 71 | 72 | .. note:: the values in context, are not guaranteed to move up the hierachy, 73 | because we do copy of context for children to have specific values. 74 | 75 | A trick you can use if you need values to move up the hierachy 76 | is to set a `"global"` key containing a dict, where you can store values. 77 | """ 78 | if context is None: 79 | context = {} 80 | return list(self.visit_iter(tree, context=context)) 81 | 82 | def visit_iter(self, node, context): 83 | """ 84 | Basic, recursive traversal of the tree. 85 | 86 | :param list parents: the list of parents 87 | :param dict context: a dict of contextual variable for free use 88 | to track states while traversing the tree (eg. the current field name) 89 | """ 90 | method = self._get_method(node) 91 | yield from method(node, context) 92 | 93 | def child_context(self, node, child, context, **kwargs): 94 | """Generate a context for children. 95 | 96 | The context children is distinct from its parent context, 97 | so that visit in a branch does not affect others. 98 | 99 | .. note:: If you need global parameters, 100 | a trick is to put them in dict in a "global" entry 101 | as we do a swallow copy of context, and not a deep one. 102 | 103 | :param luqum.tree.Item node: parent node 104 | :param luqum.tree.Item child: child node 105 | :param dict context: parent context 106 | :return dict: child context 107 | """ 108 | child_context = dict(context) 109 | if self.track_parents: 110 | child_context["parents"] = context.get("parents", ()) + (node,) 111 | return child_context 112 | 113 | def generic_visit(self, node, context): 114 | """ 115 | Default visitor function, called if nothing matches the current node. 116 | 117 | It simply visit children. 118 | 119 | :param luqum.tree.Item node: current node 120 | :param dict context: context (aka local parameters received from parents) 121 | """ 122 | for child in node.children: 123 | child_context = self.child_context(node, child, context) 124 | yield from self.visit_iter(child, context=child_context) 125 | 126 | 127 | class TreeTransformer(TreeVisitor): 128 | """A version of TreeVisitor that is aimed at obtaining a transformed copy of tree. 129 | 130 | .. note:: It is far better to build a transformed copy, 131 | than to modify in place the original tree, as it is less error prone. 132 | 133 | :param bool track_new_parents: do we want to track new parents in the context ? 134 | """ 135 | 136 | def __init__(self, track_new_parents=False, **kwargs): 137 | self.track_new_parents = track_new_parents 138 | super().__init__(**kwargs) 139 | 140 | def _clone_item(self, node): 141 | """simply call node.clone_item 142 | 143 | Surcharge this method to add specific tweaks if needed (like copying special attributes) 144 | """ 145 | return node.clone_item() 146 | 147 | def visit(self, tree, context=None): 148 | """Visit the tree, by default building a copy and returning it. 149 | 150 | :param luqum.tree.Item tree: luqum expression tree 151 | :param context: optional initial context 152 | """ 153 | if context is None: 154 | context = {} 155 | try: 156 | value, = self.visit_iter(tree, context=context) 157 | return value 158 | except ValueError as e: 159 | if str(e).startswith(("too many values to unpack", "not enough values to unpack")): 160 | exc = ValueError( 161 | "The visit of the tree should have produced exactly one element " 162 | "(the transformed tree)" 163 | ) 164 | raise exc from e 165 | else: 166 | raise 167 | 168 | def child_context(self, node, child, context, **kwargs): 169 | child_context = super().child_context(node, child, context, **kwargs) 170 | if self.track_new_parents: 171 | child_context["new_parents"] = context.get("new_parents", ()) + (kwargs["new_node"],) 172 | return child_context 173 | 174 | def generic_visit(self, node, context): 175 | """ 176 | Default visitor function, called if nothing matches the current node. 177 | 178 | It simply clone node and children 179 | """ 180 | new_node = self._clone_item(node) 181 | new_node.children = list(self.clone_children(node, new_node, context)) 182 | yield new_node 183 | 184 | def clone_children(self, node, new_node, context): 185 | """Helper to clone children. 186 | 187 | .. note:: a children may generate more than one children or none, for flexibility 188 | but it's up to the transformer to ensure everything is ok 189 | """ 190 | for child in node.children: 191 | child_context = self.child_context(node, child, context, new_node=new_node) 192 | new_children = self.visit_iter(child, context=child_context) 193 | for new_child in new_children: 194 | yield new_child 195 | 196 | 197 | class PathTrackingMixin: 198 | """It can be useful to compute path of an element (as tuple of index in parent children) 199 | 200 | This mixin provides base components 201 | """ 202 | 203 | def child_context(self, node, child, context, **kwargs): 204 | """Thanks to "path" and "position" in kwargs, we add the path of children 205 | """ 206 | child_context = super().child_context(node, child, context, **kwargs) 207 | child_context["path"] = context["path"] + (kwargs["position"],) 208 | return child_context 209 | 210 | def visit(self, node, context=None): 211 | """visit the tree while tracking their path 212 | """ 213 | if context is None: 214 | context = {} 215 | context["path"] = () 216 | return super().visit(node, context=context) 217 | 218 | 219 | class PathTrackingVisitor(PathTrackingMixin, TreeVisitor): 220 | """Path tracking version of TreeVisitor 221 | """ 222 | 223 | def generic_visit(self, node, context): 224 | for i, child in enumerate(node.children): 225 | child_context = self.child_context(node, child, context, position=i) 226 | yield from self.visit_iter(child, context=child_context) 227 | 228 | 229 | class PathTrackingTransformer(PathTrackingMixin, TreeTransformer): 230 | """Path tracking version of TreeTransformer 231 | """ 232 | 233 | def clone_children(self, node, new_node, context): 234 | for i, child in enumerate(node.children): 235 | child_context = self.child_context(node, child, context, new_node=new_node, position=i) 236 | new_children = self.visit_iter(child, context=child_context) 237 | for new_child in new_children: 238 | yield new_child 239 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.pytest.ini_options] 2 | addopts = '--doctest-modules --doctest-glob="test_*.rst" --cov=luqum --cov-branch --cov-report html --no-cov-on-fail' 3 | python_files = 'test_*.py tests.py' 4 | 5 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | 3 | coverage==7.6.10 4 | elasticsearch-dsl==8.17.1 5 | flake8==7.1.1 6 | pytest==8.3.4 7 | pytest-cov==6.0.0 8 | Sphinx==8.1.3 9 | wheel==0.45.1 10 | build==1.2.2.post1 11 | twine==6.1.0 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ply==3.11 2 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length=100 3 | exclude= 4 | parsetab.py 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from setuptools import setup 3 | 4 | from luqum import __version__ 5 | 6 | 7 | with open('README.rst', 'r') as f: 8 | long_description = f.read() 9 | with open('CHANGELOG.rst', 'r') as f: 10 | long_description += "\n\n" + f.read() 11 | 12 | 13 | setup( 14 | name='luqum', 15 | version=__version__, 16 | description="A Lucene query parser generating ElasticSearch queries and more !", 17 | long_description=long_description, 18 | author='Jurismarches', 19 | author_email='contact@jurismarches.com', 20 | url='https://github.com/jurismarches/luqum', 21 | packages=[ 22 | 'luqum', 23 | 'luqum.elasticsearch' 24 | ], 25 | install_requires=[ 26 | 'ply>=3.11', 27 | ], 28 | classifiers=[ 29 | 'Development Status :: 4 - Beta', 30 | 'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)', 31 | 'License :: OSI Approved :: Apache Software License', 32 | 'Intended Audience :: Developers', 33 | 'Programming Language :: Python', 34 | 'Programming Language :: Python :: 3.10', 35 | 'Programming Language :: Python :: 3.11', 36 | 'Programming Language :: Python :: 3.12', 37 | 'Programming Language :: Python :: 3.13', 38 | ], 39 | ) 40 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/__init__.py -------------------------------------------------------------------------------- /tests/alternative_lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fake Lexer to test: [Multiple Parsers and 3 | Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37) 4 | """ 5 | 6 | # List of token names. This is always required 7 | tokens = ( 8 | "NUMBER", 9 | "PLUS", 10 | "MINUS", 11 | "TIMES", 12 | "DIVIDE", 13 | "LPAREN", 14 | "RPAREN", 15 | ) 16 | 17 | # Regular expression rules for simple tokens 18 | t_PLUS = r"\+" 19 | t_MINUS = r"-" 20 | t_TIMES = r"\*" 21 | t_DIVIDE = r"/" 22 | t_LPAREN = r"\(" 23 | t_RPAREN = r"\)" 24 | 25 | 26 | # A regular expression rule with some action code 27 | def t_NUMBER(t): 28 | r"\d+" 29 | t.value = int(t.value) 30 | return t 31 | 32 | 33 | # Define a rule so we can track line numbers 34 | def t_newline(t): 35 | r"\n+" 36 | t.lexer.lineno += len(t.value) 37 | 38 | 39 | # A string containing ignored characters (spaces and tabs) 40 | t_ignore = " \t" 41 | 42 | 43 | # Error handling rule 44 | def t_error(t): 45 | print("Illegal character '%s'" % t.value[0]) 46 | t.lexer.skip(1) 47 | -------------------------------------------------------------------------------- /tests/test_auto_head_tail.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from unittest import TestCase 3 | 4 | from luqum.tree import ( 5 | SearchField, FieldGroup, Group, Word, Phrase, Proximity, Fuzzy, Range, 6 | Not, AndOperation, OrOperation, Plus, UnknownOperation) 7 | from luqum.auto_head_tail import auto_head_tail 8 | 9 | 10 | class AutoHeadTailTestCase(TestCase): 11 | 12 | def test_or_operation(self): 13 | tree = OrOperation(Word("foo"), Word("bar"), Word("baz")) 14 | self.assertEqual(str(tree), "fooORbarORbaz") 15 | self.assertEqual(str(auto_head_tail(tree)), "foo OR bar OR baz") 16 | 17 | def test_and_operation(self): 18 | tree = AndOperation(Word("foo"), Word("bar"), Word("baz")) 19 | self.assertEqual(str(tree), "fooANDbarANDbaz") 20 | self.assertEqual(str(auto_head_tail(tree)), "foo AND bar AND baz") 21 | 22 | def test_unknown_operation(self): 23 | tree = UnknownOperation(Word("foo"), Word("bar"), Word("baz")) 24 | self.assertEqual(str(tree), "foobarbaz") 25 | self.assertEqual(str(auto_head_tail(tree)), "foo bar baz") 26 | 27 | def test_range(self): 28 | tree = Range(Word("foo"), Word("bar")) 29 | self.assertEqual(str(tree), "[fooTObar]") 30 | self.assertEqual(str(auto_head_tail(tree)), "[foo TO bar]") 31 | 32 | def test_not(self): 33 | tree = Not(Word("foo")) 34 | self.assertEqual(str(tree), "NOTfoo") 35 | self.assertEqual(str(auto_head_tail(tree)), "NOT foo") 36 | 37 | def test_complex(self): 38 | tree = Group( 39 | OrOperation( 40 | SearchField( 41 | "foo", 42 | FieldGroup(UnknownOperation(Word("bar"), Range(Word("baz"), Word("spam")))), 43 | ), 44 | Not(Proximity(Phrase('"ham ham"'), 2)), 45 | Plus(Fuzzy(Word("hammer"), 3)), 46 | ) 47 | ) 48 | self.assertEqual(str(tree), '(foo:(bar[bazTOspam])ORNOT"ham ham"~2OR+hammer~3)') 49 | self.assertEqual( 50 | str(auto_head_tail(tree)), 51 | '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)', 52 | ) 53 | # idem potent 54 | self.assertEqual( 55 | str(auto_head_tail(auto_head_tail(tree))), 56 | '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)', 57 | ) 58 | 59 | def test_auto_head_tail_no_change_to_existing(self): 60 | tree = AndOperation( 61 | Range(Word("foo", tail="\t"), Word("bar", head="\n"), tail="\r"), 62 | Not(Word("baz", head="\t\t"), head="\n\n", tail="\r\r"), 63 | Word("spam", head="\t\n"), 64 | ) 65 | self.assertEqual(str(tree), "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam") 66 | self.assertEqual( 67 | str(auto_head_tail(tree)), 68 | "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam" 69 | ) 70 | -------------------------------------------------------------------------------- /tests/test_deprecated_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | .. todo:: split this file in multiple file: tree, lexer, parser 4 | """ 5 | import collections 6 | import copy 7 | from unittest import TestCase 8 | 9 | from luqum.tree import Group, Word, Phrase, AndOperation, OrOperation 10 | from luqum.deprecated_utils import LuceneTreeVisitor, LuceneTreeTransformer, LuceneTreeVisitorV2 11 | 12 | 13 | class TreeVisitorTestCase(TestCase): 14 | 15 | class BasicVisitor(LuceneTreeVisitor): 16 | """ Dummy visitor, simply yielding a list of nodes. """ 17 | def generic_visit(self, node, parents): 18 | yield node 19 | 20 | class MROVisitor(LuceneTreeVisitor): 21 | 22 | def visit_or_operation(self, node, parents=[]): 23 | return ["{} OR {}".format(*node.children)] 24 | 25 | def visit_base_operation(self, node, parents=[]): 26 | return ["{} BASE_OP {}".format(*node.children)] 27 | 28 | def visit_word(self, node, parents=[]): 29 | return [node.value] 30 | 31 | def test_generic_visit(self): 32 | tree = ( 33 | AndOperation( 34 | Word("foo"), 35 | Word("bar"))) 36 | 37 | visitor = LuceneTreeVisitor() 38 | nodes = list(visitor.visit(tree)) 39 | self.assertEqual(nodes, []) 40 | 41 | def test_basic_traversal(self): 42 | tree = ( 43 | AndOperation( 44 | Word("foo"), 45 | Word("bar"))) 46 | 47 | visitor = self.BasicVisitor() 48 | nodes = list(visitor.visit(tree)) 49 | 50 | self.assertListEqual( 51 | [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')], 52 | nodes) 53 | 54 | def test_mro(self): 55 | visitor = self.MROVisitor() 56 | 57 | tree = OrOperation(Word('a'), Word('b')) 58 | result = visitor.visit(tree) 59 | self.assertEqual(list(result), ['a OR b', 'a', 'b']) 60 | 61 | tree = AndOperation(Word('a'), Word('b')) 62 | result = visitor.visit(tree) 63 | self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b']) 64 | 65 | 66 | class TreeTransformerTestCase(TestCase): 67 | 68 | class BasicTransformer(LuceneTreeTransformer): 69 | """ 70 | Dummy transformer that simply turn any Word node's value into "lol" 71 | """ 72 | def visit_word(self, node, parent): 73 | return Word('lol') 74 | 75 | def visit_phrase(self, node, parent): 76 | return None 77 | 78 | class OrListOperation(OrOperation): 79 | """Dummy operation having list operands instead of tuple 80 | """ 81 | def __init__(self, *args, **kwargs): 82 | super().__init__(*args, **kwargs) 83 | self.operands = list(self.operands) 84 | 85 | def test_basic_traversal(self): 86 | tree = ( 87 | AndOperation( 88 | Word("foo"), 89 | Word("bar"))) 90 | 91 | transformer = self.BasicTransformer() 92 | new_tree = transformer.visit(tree) 93 | 94 | self.assertEqual( 95 | new_tree, 96 | (AndOperation( 97 | Word("lol"), 98 | Word("lol")))) 99 | 100 | def test_no_transform(self): 101 | tree = AndOperation() 102 | transformer = self.BasicTransformer() 103 | new_tree = transformer.visit(tree) 104 | self.assertEqual( 105 | new_tree, 106 | AndOperation()) 107 | 108 | def test_one_word(self): 109 | tree = Word("foo") 110 | transformer = self.BasicTransformer() 111 | new_tree = transformer.visit(tree) 112 | self.assertEqual( 113 | new_tree, 114 | Word("lol")) 115 | 116 | def test_removal(self): 117 | tree = ( 118 | AndOperation( 119 | AndOperation( 120 | Word("foo"), 121 | Phrase('"bar"')), 122 | AndOperation( 123 | Phrase('"baz"'), 124 | Phrase('"biz"')))) 125 | 126 | transformer = self.BasicTransformer() 127 | new_tree = transformer.visit(tree) 128 | 129 | self.assertEqual( 130 | new_tree, 131 | (AndOperation( 132 | AndOperation(Word("lol")), 133 | AndOperation()))) 134 | 135 | def test_operands_list(self): 136 | OrListOperation = self.OrListOperation 137 | tree = ( 138 | OrListOperation( 139 | OrListOperation( 140 | Word("foo"), 141 | Phrase('"bar"')), 142 | OrListOperation( 143 | Phrase('"baz"')))) 144 | 145 | transformer = self.BasicTransformer() 146 | new_tree = transformer.visit(tree) 147 | 148 | self.assertEqual( 149 | new_tree, 150 | (OrListOperation( 151 | OrListOperation(Word("lol")), 152 | OrListOperation()))) 153 | 154 | def test_silent_value_error(self): 155 | # in the case some attribute mislead the search for node do not raise 156 | tree = AndOperation(Word("a"), Word("b")) 157 | setattr(tree, "misleading1", ()) 158 | setattr(tree, "misleading2", []) 159 | # hackishly patch __dict__ to be sure we have operands in right order for test 160 | tree.__dict__ = collections.OrderedDict(tree.__dict__) 161 | tree.__dict__['operands'] = tree.__dict__.pop('operands') # operands are now last 162 | 163 | transformer = self.BasicTransformer() 164 | new_tree = transformer.visit(tree) 165 | 166 | self.assertEqual( 167 | new_tree, 168 | AndOperation(Word("lol"), Word("lol"))) 169 | 170 | def test_repeating_expression(self): 171 | # non regression test 172 | tree = AndOperation( 173 | Group(OrOperation(Word('bar'), Word('foo'))), 174 | Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))), 175 | ) 176 | # basic transformer should not change tree 177 | same_tree = LuceneTreeTransformer().visit(copy.deepcopy(tree)) 178 | self.assertEqual(same_tree, tree) 179 | 180 | 181 | class TreeVisitorV2TestCase(TestCase): 182 | 183 | class BasicVisitor(LuceneTreeVisitorV2): 184 | """ Dummy visitor, simply yielding a list of nodes. """ 185 | def generic_visit(self, node, parents, context): 186 | yield node 187 | for c in node.children: 188 | yield from self.visit(c, parents + [node], context) 189 | 190 | class MROVisitor(LuceneTreeVisitorV2): 191 | 192 | def visit_or_operation(self, node, parents=[], context=None): 193 | return "{} OR {}".format(*[self.visit(c) for c in node.children]) 194 | 195 | def visit_base_operation(self, node, parents=[], context=None): 196 | return "{} BASE_OP {}".format(*[self.visit(c) for c in node.children]) 197 | 198 | def visit_word(self, node, parents=[], context=None): 199 | return node.value 200 | 201 | def test_basic_traversal(self): 202 | tree = ( 203 | AndOperation( 204 | Word("foo"), 205 | Word("bar"))) 206 | 207 | visitor = self.BasicVisitor() 208 | nodes = list(visitor.visit(tree)) 209 | 210 | self.assertListEqual( 211 | [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')], 212 | nodes) 213 | 214 | def test_mro(self): 215 | visitor = self.MROVisitor() 216 | 217 | tree = OrOperation(Word('a'), Word('b')) 218 | result = visitor.visit(tree) 219 | self.assertEqual(result, 'a OR b') 220 | 221 | tree = OrOperation(AndOperation(Word('a'), Word('b')), Word('c')) 222 | result = visitor.visit(tree) 223 | self.assertEqual(result, 'a BASE_OP b OR c') 224 | 225 | def test_generic_visit_fails_by_default(self): 226 | visitor = self.MROVisitor() 227 | with self.assertRaises(AttributeError): 228 | visitor.visit(Phrase('"test"')) 229 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/test_elasticsearch/__init__.py -------------------------------------------------------------------------------- /tests/test_elasticsearch/book.json: -------------------------------------------------------------------------------- 1 | { 2 | "books": [ 3 | { 4 | "title": "Harry Potter and the Philosopher's Stone", 5 | "edition": "Bloomsbury", 6 | "author": { 7 | "name": "J. K. Rowling", 8 | "birthdate": "1965-07-31" 9 | }, 10 | "illustrators": [ 11 | { 12 | "name": "Thomas Taylor", 13 | "nationality": "UK", 14 | "birthdate": "1973-05-22" 15 | }, 16 | { 17 | "name": "Mary GrandPré", 18 | "nationality":"US", 19 | "birthdate": "1954-02-13" 20 | } 21 | ], 22 | "publication_date": "1997-06-26", 23 | "n_pages": "223", 24 | "ref": "HP1" 25 | }, 26 | { 27 | "title": "Harry Potter and the Chamber of Secrets", 28 | "edition": "Bloomsbury", 29 | "author": { 30 | "name": "J. K. Rowling", 31 | "birthdate": "1965-07-31" 32 | }, 33 | "illustrators": [ 34 | { 35 | "name": "Cliff Wright", 36 | "nationality": "UK", 37 | "birthdate": "1953-10-24" 38 | }, 39 | { 40 | "name": "Mary GrandPré", 41 | "nationality": "US", 42 | "birthdate": "1954-02-13" 43 | } 44 | ], 45 | "publication_date": "1998-07-02", 46 | "n_pages": "251", 47 | "ref": "HP2" 48 | }, 49 | { 50 | "title": "Harry Potter and the Prisoner of Azkaban", 51 | "edition": "Bloomsbury", 52 | "author": { 53 | "name": "J. K. Rowling", 54 | "birthdate": "1965-07-31" 55 | }, 56 | "illustrators": [ 57 | { 58 | "name": "Cliff Wright", 59 | "nationality": "UK", 60 | "birthdate": "1953-10-24" 61 | }, 62 | { 63 | "name": "Mary GrandPré", 64 | "nationality": "US", 65 | "birthdate": "1954-02-13" 66 | } 67 | ], 68 | "publication_date": "1999-07-08", 69 | "n_pages": "317", 70 | "ref": "HP3" 71 | }, 72 | { 73 | "title": "Harry Potter and the Goblet of Fire", 74 | "edition": "Bloomsbury", 75 | "author": { 76 | "name": "J. K. Rowling", 77 | "birthdate": "1965-07-31" 78 | }, 79 | "illustrators": [ 80 | { 81 | "name": "Giles Greenfield", 82 | "nationality": "UK" 83 | }, 84 | { 85 | "name": "Mary GrandPré", 86 | "nationality": "US", 87 | "birthdate": "1954-02-13" 88 | } 89 | ], 90 | "publication_date": "2000-07-08", 91 | "n_pages": "636", 92 | "ref": "HP4" 93 | }, 94 | { 95 | "title": "Harry Potter and the Order of the Phoenix", 96 | "edition": "Bloomsbury", 97 | "author": { 98 | "name": "J. K. Rowling", 99 | "birthdate": "1965-07-31" 100 | }, 101 | "illustrators": [ 102 | { 103 | "name":"Jason Cockcroft", 104 | "nationality":"UK" 105 | }, 106 | { 107 | "name": "Mary GrandPré", 108 | "nationality": "US", 109 | "birthdate": "1954-02-13" 110 | } 111 | ], 112 | "publication_date": "2003-06-21", 113 | "n_pages": "766", 114 | "ref": "HP5" 115 | }, 116 | { 117 | "title": "Harry Potter and the Half-Blood Prince", 118 | "edition": "Bloomsbury", 119 | "author": { 120 | "name": "J. K. Rowling", 121 | "birthdate": "1965-07-31" 122 | }, 123 | "illustrators": [ 124 | { 125 | "name": "Jason Cockcroft", 126 | "nationality": "UK" 127 | }, 128 | { 129 | "name": "Mary GrandPré", 130 | "nationality": "US", 131 | "birthdate": "1954-02-13" 132 | } 133 | ], 134 | "publication_date": "2005-07-16", 135 | "n_pages": "607", 136 | "ref": "HP6" 137 | }, 138 | { 139 | "title": "Harry Potter and the Deathly Hallows", 140 | "edition": "Bloomsbury", 141 | "author": { 142 | "name": "J. K. Rowling", 143 | "birthdate": "1965-07-31" 144 | }, 145 | "illustrators": [ 146 | { 147 | "name": "Jason Cockcroft", 148 | "nationality": "UK" 149 | }, 150 | { 151 | "name": "Mary GrandPré", 152 | "nationality": "US", 153 | "birthdate": "1954-02-13" 154 | } 155 | ], 156 | "publication_date": "2007-07-21", 157 | "n_pages": "607", 158 | "ref": "HP7" 159 | }, 160 | { 161 | "title": "Harry Potter and the Cursed Child", 162 | "edition": "Little, Brown and Company", 163 | "author": { 164 | "name": "J. K. Rowling", 165 | "birthdate": "1965-07-31" 166 | }, 167 | "illustrators": [], 168 | "publication_date": "2016-07-30", 169 | "n_pages": "360", 170 | "ref": "HP8" 171 | }, 172 | { 173 | "title": "The Tales of Beedle the Bard", 174 | "edition": "Lumos (charity)", 175 | "author": { 176 | "name": "J. K. Rowling", 177 | "birthdate": "1965-07-31" 178 | }, 179 | "illustrators": [ 180 | { 181 | "name":"J. K. Rowling", 182 | "nationality": "UK", 183 | "birthdate": "1965-07-31" 184 | } 185 | ], 186 | "publication_date": "2008-12-04", 187 | "n_pages": "157", 188 | "ref": "BB1" 189 | } 190 | ] 191 | } 192 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/es_integration_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import elasticsearch_dsl 5 | from elasticsearch.exceptions import ConnectionError 6 | from elasticsearch.helpers import bulk 7 | from elasticsearch_dsl import Date, Index, Integer, Nested, Object, Search, analyzer 8 | from elasticsearch_dsl.connections import connections 9 | 10 | from luqum.elasticsearch import ElasticsearchQueryBuilder, SchemaAnalyzer 11 | 12 | 13 | MAJOR_ES = elasticsearch_dsl.VERSION[0] 14 | if MAJOR_ES > 2: 15 | from elasticsearch_dsl import Keyword 16 | 17 | ES6 = False 18 | if MAJOR_ES >= 6: 19 | from elasticsearch_dsl import Text, Document, InnerDoc 20 | 21 | ES6 = True 22 | else: 23 | from elasticsearch_dsl import ( 24 | String as Text, 25 | DocType as Document, 26 | InnerObjectWrapper as InnerDoc, 27 | ) 28 | 29 | 30 | def get_es(): 31 | """Return an es connection or None if none seems available. 32 | 33 | Also wait for ES to be ready (yellow status) 34 | """ 35 | # you may use ES_HOST environment variable to configure Elasticsearch 36 | # launching something like 37 | # docker run --rm -p "127.0.0.1:9200:9200" -e "discovery.type=single-node" elasticsearch:7.8.0 38 | # is a simple way to get an instance 39 | connections.configure( 40 | default=dict(hosts=os.environ.get("ES_HOST", "http://localhost:9200"), timeout=20) 41 | ) 42 | try: 43 | client = connections.get_connection("default") 44 | # check ES running 45 | client.cluster.health(wait_for_status='yellow') 46 | except ConnectionError: 47 | client = None 48 | return client 49 | 50 | 51 | if MAJOR_ES > 2: 52 | 53 | class Illustrator(InnerDoc): 54 | """Inner object to be nested in Book, details on an illustrator 55 | """ 56 | name = Text() 57 | birthdate = Date() 58 | nationality = Keyword() 59 | 60 | 61 | class Book(Document): 62 | """An objects representing a book in ES 63 | """ 64 | title = Text(fields={ 65 | "no_vowels": Text( 66 | analyzer=analyzer("no_vowels", "pattern", pattern=r"[\Waeiouy]"), # noqa: W605 67 | search_analyzer="standard" 68 | ) 69 | }) 70 | ref = Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed") 71 | edition = Text() 72 | author = Object(properties={"name": Text(), "birthdate": Date()}) 73 | publication_date = Date() 74 | n_pages = Integer() 75 | 76 | if ES6: 77 | illustrators = Nested(Illustrator) 78 | 79 | class Index: 80 | name = "bk" 81 | 82 | else: 83 | illustrators = Nested( 84 | properties={ 85 | "name": Text(), 86 | "birthdate": Date(), 87 | "nationality": Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed"), 88 | } 89 | ) 90 | 91 | class Meta: 92 | index = "bk" 93 | 94 | 95 | def add_book_data(es): 96 | """Create a "bk" index and fill it with data 97 | """ 98 | remove_book_index(es) 99 | Book.init() 100 | with open(os.path.join(os.path.dirname(__file__), "book.json")) as f: 101 | datas = json.load(f) 102 | actions = ( 103 | {"_op_type": "index", "_id": i, "_source": d} 104 | for i, d in enumerate(datas["books"]) 105 | ) 106 | if MAJOR_ES >= 7: 107 | bulk(es, actions, index="bk", refresh=True) 108 | else: 109 | if ES6: 110 | doc_type = "doc" 111 | else: 112 | doc_type = "book" 113 | bulk(es, actions, index="bk", doc_type=doc_type, refresh=True) 114 | 115 | 116 | def book_search(es): 117 | """Return an elasticsearch_dsl search object 118 | """ 119 | return Search(using=es, index="bk") 120 | 121 | 122 | def book_query_builder(es): 123 | """Return an ElasticsearchQueryBuilder adapted for search in book. 124 | 125 | title is adapted to search the title.no_wowels field along with the title 126 | """ 127 | MESSAGES_SCHEMA = {"mappings": Book._doc_type.mapping.to_dict()} 128 | schema_analizer = SchemaAnalyzer(MESSAGES_SCHEMA) 129 | builder_options = schema_analizer.query_builder_options() 130 | builder_options['field_options'] = { 131 | 'title.no_vowels': { 132 | 'match_type': 'multi_match', 133 | 'type': 'most_fields', 134 | 'fields': ('title', 'title.no_vowels') 135 | } 136 | } 137 | return ElasticsearchQueryBuilder(**builder_options) 138 | 139 | 140 | def remove_book_index(es): 141 | """clean "bk" index 142 | """ 143 | if es is None: 144 | return 145 | if ES6: 146 | Book._index.delete(ignore=404) 147 | else: 148 | Index("bk").delete(ignore=404) 149 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/test_es_integration.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, skipIf 2 | 3 | from luqum.parser import parser 4 | 5 | from .es_integration_utils import ( 6 | add_book_data, book_query_builder, book_search, get_es, remove_book_index, 7 | ) 8 | 9 | 10 | @skipIf(get_es() is None, "Skipping ES test as ES seems unreachable") 11 | class LuqumRequestTestCase(TestCase): 12 | 13 | @classmethod 14 | def setUpClass(cls): 15 | cls.es_client = get_es() 16 | if cls.es_client is None: 17 | return 18 | cls.es_builder = book_query_builder(cls.es_client) 19 | cls.search = book_search(cls.es_client) 20 | add_book_data(cls.es_client) 21 | 22 | def _ask_luqum(self, req): 23 | tree = parser.parse(req) 24 | query = self.es_builder(tree) 25 | return [x.title for x in self.search.filter(query).execute()] 26 | 27 | def test_simple_field_search(self): 28 | self.assertListEqual( 29 | self._ask_luqum('title:"Chamber"'), 30 | ["Harry Potter and the Chamber of Secrets"], 31 | ) 32 | 33 | def test_nested_field_search(self): 34 | self.assertListEqual( 35 | self._ask_luqum("illustrators:(name:Giles)"), 36 | ["Harry Potter and the Goblet of Fire"], 37 | ) 38 | 39 | def test_or_condition_search(self): 40 | self.assertCountEqual( 41 | self._ask_luqum( 42 | 'illustrators:(name:"Giles Greenfield" OR name:"Cliff Wright")' 43 | ), 44 | [ 45 | "Harry Potter and the Prisoner of Azkaban", 46 | "Harry Potter and the Chamber of Secrets", 47 | "Harry Potter and the Goblet of Fire", 48 | ], 49 | ) 50 | 51 | def test_and_condition_search(self): 52 | self.assertCountEqual( 53 | self._ask_luqum( 54 | 'illustrators:(name:"Cliff Wright") AND illustrators:(name:"Mary GrandPré")' 55 | ), 56 | [ 57 | "Harry Potter and the Prisoner of Azkaban", 58 | "Harry Potter and the Chamber of Secrets", 59 | ], 60 | ) 61 | 62 | def test_date_range_search(self): 63 | self.assertCountEqual( 64 | self._ask_luqum("publication_date:[2005-01-01 TO 2010-12-31]"), 65 | [ 66 | "Harry Potter and the Half-Blood Prince", 67 | "The Tales of Beedle the Bard", 68 | "Harry Potter and the Deathly Hallows", 69 | ], 70 | ) 71 | 72 | def test_int_range_search(self): 73 | self.assertCountEqual( 74 | self._ask_luqum("n_pages:[500 TO *]"), 75 | [ 76 | "Harry Potter and the Half-Blood Prince", 77 | "Harry Potter and the Order of the Phoenix", 78 | "Harry Potter and the Deathly Hallows", 79 | "Harry Potter and the Goblet of Fire", 80 | ], 81 | ) 82 | 83 | def test_int_search(self): 84 | self.assertListEqual( 85 | self._ask_luqum("n_pages:360"), ["Harry Potter and the Cursed Child"] 86 | ) 87 | 88 | def test_proximity_search(self): 89 | self.assertListEqual( 90 | self._ask_luqum('title:"Harry Secrets"~5'), 91 | ["Harry Potter and the Chamber of Secrets"], 92 | ) 93 | 94 | def test_fuzzy_search(self): 95 | self.assertListEqual( 96 | self._ask_luqum("title:Gublet~2"), ["Harry Potter and the Goblet of Fire"] 97 | ) 98 | 99 | def test_object_field_search(self): 100 | self.assertListEqual( 101 | self._ask_luqum('illustrators:(name:"J. K. Rowling")'), 102 | ["The Tales of Beedle the Bard"], 103 | ) 104 | 105 | def test_fail_search(self): 106 | self.assertListEqual(self._ask_luqum("title:secret"), []) 107 | 108 | def test_wildcard_matching(self): 109 | self.assertListEqual( 110 | self._ask_luqum("title:secret*"), 111 | ["Harry Potter and the Chamber of Secrets"], 112 | ) 113 | 114 | def test_wildcard1_search(self): 115 | self.assertListEqual( 116 | self._ask_luqum("title:P*ix"), ["Harry Potter and the Order of the Phoenix"] 117 | ) 118 | 119 | def test_not_search(self): 120 | self.assertListEqual( 121 | self._ask_luqum("-title:Harry"), ["The Tales of Beedle the Bard"] 122 | ) 123 | 124 | def test_not_analysed_field_search(self): 125 | self.assertListEqual(self._ask_luqum("illustrators:nationality:uk"), []) 126 | 127 | def test_complex_search(self): 128 | self.assertListEqual( 129 | self._ask_luqum( 130 | """ 131 | title:phoenux~2 AND 132 | illustrators:name:Grand* AND 133 | illustrators:( 134 | -name:grandpr* AND ( 135 | name:J*on OR birthdate:[1950-01-01 TO 1970-01-01] 136 | ) 137 | ) 138 | """ 139 | ), 140 | ["Harry Potter and the Order of the Phoenix"], 141 | ) 142 | 143 | def test_subfield_multi_match_search(self): 144 | self.assertListEqual( 145 | self._ask_luqum("title.no_vowels:Potter AND title.no_vowels:x"), 146 | ["Harry Potter and the Order of the Phoenix"], 147 | ) 148 | 149 | @classmethod 150 | def tearDownClass(cls): 151 | remove_book_index(cls.es_client) 152 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/test_estree.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.elasticsearch.tree import EShould, EWord 4 | 5 | 6 | class TestItems(TestCase): 7 | 8 | def test_should_operation_options(self): 9 | op = EShould(items=[EWord(q="a"), EWord(q="b"), EWord(q="c")], minimum_should_match=2) 10 | self.assertEqual( 11 | op.json, 12 | {'bool': { 13 | 'should': [ 14 | {'term': {'': {'value': 'a'}}}, 15 | {'term': {'': {'value': 'b'}}}, 16 | {'term': {'': {'value': 'c'}}}, 17 | ], 18 | 'minimum_should_match': 2, 19 | }}, 20 | ) 21 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/test_naming.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.tree import ( 4 | AndOperation, Word, Prohibit, OrOperation, Not, Phrase, SearchField, 5 | UnknownOperation, Boost, Fuzzy, Proximity, Range, Group, FieldGroup, 6 | Plus) 7 | from luqum.naming import auto_name, set_name 8 | from luqum.elasticsearch.visitor import ElasticsearchQueryBuilder 9 | 10 | 11 | class ElasticsearchTreeTransformerTestCase(TestCase): 12 | 13 | @classmethod 14 | def setUpClass(cls): 15 | cls.transformer = ElasticsearchQueryBuilder( 16 | default_field="text", 17 | not_analyzed_fields=['not_analyzed_field', 'text', 'author.tag'], 18 | nested_fields={ 19 | 'author': ['name', 'tag'] 20 | }, 21 | object_fields=["book.title", "author.rewards.name"], 22 | sub_fields=["book.title.raw"], 23 | ) 24 | 25 | def test_named_queries_match(self): 26 | tree = SearchField("spam", Word("bar")) 27 | set_name(tree, "a") 28 | result = self.transformer(tree) 29 | self.assertEqual( 30 | result, 31 | { 32 | "match": { 33 | "spam": { 34 | "query": "bar", 35 | "_name": "a", 36 | "zero_terms_query": "none", 37 | }, 38 | }, 39 | }, 40 | ) 41 | 42 | tree = SearchField("spam", Phrase('"foo bar"')) 43 | set_name(tree, "a") 44 | result = self.transformer(tree) 45 | self.assertEqual( 46 | result, 47 | { 48 | "match_phrase": { 49 | "spam": { 50 | "query": "foo bar", 51 | "_name": "a", 52 | }, 53 | }, 54 | }, 55 | ) 56 | 57 | def test_named_queries_term(self): 58 | tree = SearchField("text", Word("bar")) 59 | set_name(tree, "a") 60 | result = self.transformer(tree) 61 | self.assertEqual( 62 | result, 63 | {"term": {"text": {"value": "bar", "_name": "a"}}}, 64 | ) 65 | 66 | tree = SearchField("text", Phrase('"foo bar"')) 67 | set_name(tree, "a") 68 | result = self.transformer(tree) 69 | self.assertEqual( 70 | result, 71 | {"term": {"text": {"value": "foo bar", "_name": "a"}}}, 72 | ) 73 | 74 | def test_named_queries_fuzzy(self): 75 | tree = SearchField("text", Fuzzy(Word('bar'))) 76 | set_name(tree.children[0], "a") 77 | result = self.transformer(tree) 78 | self.assertEqual( 79 | result, 80 | {"fuzzy": {"text": {"value": "bar", "_name": "a", 'fuzziness': 0.5}}}, 81 | ) 82 | 83 | def test_named_queries_proximity(self): 84 | tree = SearchField("spam", Proximity(Phrase('"foo bar"'))) 85 | set_name(tree.children[0], "a") 86 | result = self.transformer(tree) 87 | self.assertEqual( 88 | result, 89 | {"match_phrase": {"spam": {"query": "foo bar", "_name": "a", 'slop': 1.0}}}, 90 | ) 91 | 92 | def test_named_queries_boost(self): 93 | tree = SearchField("text", Boost(Phrase('"foo bar"'), force=2)) 94 | set_name(tree.children[0], "a") 95 | result = self.transformer(tree) 96 | self.assertEqual( 97 | result, 98 | {"term": {"text": {"value": "foo bar", "_name": "a", 'boost': 2.0}}}, 99 | ) 100 | 101 | def test_named_queries_or(self): 102 | tree = OrOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar"))) 103 | set_name(tree.operands[0], "a") 104 | set_name(tree.operands[1], "b") 105 | result = self.transformer(tree) 106 | self.assertEqual( 107 | result, 108 | {'bool': {'should': [ 109 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}}, 110 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}} 111 | ]}} 112 | ) 113 | 114 | def test_named_queries_and(self): 115 | tree = AndOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar"))) 116 | set_name(tree.operands[0], "a") 117 | set_name(tree.operands[1], "b") 118 | result = self.transformer(tree) 119 | self.assertEqual( 120 | result, 121 | {'bool': {'must': [ 122 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}}, 123 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'all'}}} 124 | ]}} 125 | ) 126 | 127 | def test_named_queries_unknown(self): 128 | tree = UnknownOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar"))) 129 | set_name(tree.operands[0], "a") 130 | set_name(tree.operands[1], "b") 131 | result = self.transformer(tree) 132 | self.assertEqual( 133 | result, 134 | {'bool': {'should': [ 135 | {'term': {'text': {'_name': 'a', 'value': 'foo'}}}, 136 | {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}} 137 | ]}} 138 | ) 139 | 140 | def test_named_queries_not(self): 141 | tree = Not(SearchField("text", Word("foo"))) 142 | set_name(tree, "a") 143 | result = self.transformer(tree) 144 | self.assertEqual( 145 | result, 146 | {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}} 147 | ) 148 | 149 | tree = Prohibit(SearchField("text", Word("foo"))) 150 | set_name(tree, "a") 151 | result = self.transformer(tree) 152 | self.assertEqual( 153 | result, 154 | {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}} 155 | ) 156 | 157 | def test_named_queries_plus(self): 158 | tree = Plus(SearchField("text", Word("foo"))) 159 | set_name(tree, "a") 160 | result = self.transformer(tree) 161 | self.assertEqual( 162 | result, 163 | {'bool': {'must': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}} 164 | ) 165 | 166 | def test_named_queries_range(self): 167 | tree = SearchField("text", Range(Word("x"), Word("z"))) 168 | set_name(tree, "a") 169 | result = self.transformer(tree) 170 | self.assertEqual(result, {'range': {'text': {'_name': 'a', 'gte': 'x', 'lte': 'z'}}}) 171 | 172 | def test_named_queries_nested(self): 173 | tree = SearchField("author.name", Word("Monthy")) 174 | set_name(tree, "a") 175 | result = self.transformer(tree) 176 | # name is repeated on query, but it's not a big deal… 177 | self.assertEqual( 178 | result, 179 | { 180 | 'nested': { 181 | '_name': 'a', 182 | 'path': 'author', 183 | 'query': {'match': {'author.name': { 184 | '_name': 'a', 'query': 'Monthy', 'zero_terms_query': 'none', 185 | }}}, 186 | }, 187 | } 188 | ) 189 | 190 | def test_named_queries_object(self): 191 | tree = SearchField("book.title", Word("Circus")) 192 | set_name(tree, "a") 193 | result = self.transformer(tree) 194 | # name is repeated on query, but it's not a big deal… 195 | self.assertEqual( 196 | result, 197 | { 198 | 'match': {'book.title': { 199 | '_name': 'a', 'query': 'Circus', 'zero_terms_query': 'none' 200 | }} 201 | } 202 | ) 203 | 204 | def test_named_queries_group(self): 205 | tree = SearchField("text", FieldGroup(Word("bar"))) 206 | set_name(tree.children[0], "a") 207 | result = self.transformer(tree) 208 | self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},) 209 | 210 | tree = Group(SearchField("text", Word("bar"))) 211 | set_name(tree, "a") 212 | result = self.transformer(tree) 213 | self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},) 214 | 215 | def test_named_queries_exists(self): 216 | tree = SearchField("text", Word("*")) 217 | set_name(tree.children[0], "a") 218 | result = self.transformer(tree) 219 | self.assertEqual(result, {"exists": {"field": "text", "_name": "a"}},) 220 | 221 | def test_named_queries_complex(self): 222 | tree = ( 223 | AndOperation( 224 | SearchField("text", Phrase('"foo bar"')), 225 | Group( 226 | OrOperation( 227 | Word("bar"), 228 | SearchField("spam", Word("baz")), 229 | ), 230 | ), 231 | ) 232 | ) 233 | and_op = tree 234 | search_text = and_op.operands[0] 235 | or_op = and_op.operands[1].children[0] 236 | bar = or_op.operands[0] 237 | search_spam = or_op.operands[1] 238 | set_name(search_text, "foo_bar") 239 | set_name(bar, "bar") 240 | set_name(search_spam, "baz") 241 | 242 | expected = { 243 | 'bool': {'must': [ 244 | {'term': {'text': {'_name': 'foo_bar', 'value': 'foo bar'}}}, 245 | {'bool': {'should': [ 246 | {'term': {'text': {'_name': 'bar', 'value': 'bar'}}}, 247 | {'match': {'spam': { 248 | '_name': 'baz', 249 | 'query': 'baz', 250 | 'zero_terms_query': 'none' 251 | }}} 252 | ]}} 253 | ]} 254 | } 255 | 256 | result = self.transformer(tree) 257 | self.assertEqual(result, expected) 258 | 259 | def test_auto_name_integration(self): 260 | tree = ( 261 | AndOperation( 262 | SearchField("text", Phrase('"foo bar"')), 263 | Group( 264 | OrOperation( 265 | Word("bar"), 266 | SearchField("spam", Word("baz")), 267 | ), 268 | ), 269 | ) 270 | ) 271 | auto_name(tree) 272 | 273 | expected = { 274 | 'bool': {'must': [ 275 | {'term': {'text': {'_name': 'a', 'value': 'foo bar'}}}, 276 | {'bool': {'should': [ 277 | {'term': {'text': {'_name': 'c', 'value': 'bar'}}}, 278 | {'match': {'spam': { 279 | '_name': 'd', 280 | 'query': 'baz', 281 | 'zero_terms_query': 'none' 282 | }}} 283 | ]}} 284 | ]} 285 | } 286 | 287 | result = self.transformer(tree) 288 | self.assertEqual(result, expected) 289 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/test_nested.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.elasticsearch.nested import extract_nested_queries, get_first_name 4 | 5 | 6 | class NestedQueriesTestCase(TestCase): 7 | 8 | def test_no_nested(self): 9 | queries = extract_nested_queries({"term": {"text": {"value": "spam", "_name": "spam"}}}) 10 | self.assertEqual(queries, []) 11 | 12 | queries = extract_nested_queries( 13 | {"bool": {"must": [ 14 | {"term": {"text": {"value": "spam", "_name": "spam"}}}, 15 | {"term": {"text": {"value": "ham", "_name": "ham"}}}, 16 | ]}} 17 | ) 18 | self.assertEqual(queries, []) 19 | 20 | def test_nested_no_bool_inside(self): 21 | queries = extract_nested_queries( 22 | {"nested": { 23 | "path": "my", 24 | "query": {"term": {"text": {"value": "spam", "_name": "spam"}}} 25 | }} 26 | ) 27 | self.assertEqual(queries, []) 28 | 29 | def test_nested_bool_inside(self): 30 | term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 31 | term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}} 32 | bool_query = {"bool": {"must": [term1, term2]}} 33 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query}}) 34 | self.assertEqual( 35 | queries, 36 | [ 37 | {"nested": {"path": "my", "query": term1, "_name": "spam"}}, 38 | {"nested": {"path": "my", "query": term2, "_name": "ham"}}, 39 | ], 40 | ) 41 | 42 | def test_nested_in_bool_with_bool_inside(self): 43 | term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 44 | term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}} 45 | term3 = {"term": {"text": {"value": "foo", "_name": "foo"}}} 46 | bool_query = {"bool": {"must": [term1, term2]}} 47 | queries = extract_nested_queries( 48 | {"bool": {"should": [term3, {"nested": {"path": "my", "query": bool_query}}]}} 49 | ) 50 | self.assertEqual( 51 | queries, 52 | [ 53 | {"nested": {"path": "my", "query": term1, "_name": "spam"}}, 54 | {"nested": {"path": "my", "query": term2, "_name": "ham"}}, 55 | ], 56 | ) 57 | 58 | def test_nested_bool_inside_bool(self): 59 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}} 60 | term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}} 61 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 62 | bool_query1 = {"bool": {"should": [term1, term2]}} 63 | bool_query2 = {"bool": {"must": [term3, bool_query1]}} 64 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}}) 65 | self.assertEqual(queries, [ 66 | {"nested": {"path": "my", "query": term3, "_name": "spam"}}, 67 | {"nested": {"path": "my", "query": bool_query1}}, 68 | {"nested": {"path": "my", "query": term1, "_name": "bar"}}, 69 | {"nested": {"path": "my", "query": term2, "_name": "baz"}}, 70 | ]) 71 | 72 | def test_nested_inside_nested(self): 73 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}} 74 | term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}} 75 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 76 | bool_query1 = {"bool": {"should": [term1, term2]}} 77 | inner_nested = {"nested": {"path": "my.your", "query": bool_query1}} 78 | bool_query2 = {"bool": {"must": [term3, inner_nested]}} 79 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}}) 80 | self.assertEqual(queries, [ 81 | {"nested": {"path": "my", "query": term3, "_name": "spam"}}, 82 | {"nested": {"path": "my", "query": inner_nested}}, 83 | {"nested": {"path": "my", "_name": "bar", "query": {"nested": { 84 | "path": "my.your", "query": term1, 85 | }}}}, 86 | {"nested": {"path": "my", "_name": "baz", "query": {"nested": { 87 | "path": "my.your", "query": term2, 88 | }}}}, 89 | ]) 90 | 91 | def test_nested_inside_nested_with_nested_bool(self): 92 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}} 93 | term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}} 94 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 95 | bool_query1 = {"bool": {"must_not": [term1]}} 96 | bool_query2 = {"bool": {"should": [term2, bool_query1]}} 97 | inner_nested = {"nested": {"path": "my.your", "query": bool_query2}} 98 | bool_query3 = {"bool": {"must_not": [inner_nested]}} 99 | bool_query4 = {"bool": {"must": [term3, bool_query3]}} 100 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}}) 101 | self.assertEqual(queries, [ 102 | {"nested": {"path": "my", "query": term3, "_name": "spam"}}, 103 | {"nested": {"path": "my", "query": bool_query3}}, 104 | {"nested": {"path": "my", "query": inner_nested}}, 105 | {"nested": {"path": "my", "_name": "foo", "query": { 106 | "nested": {"path": "my.your", "query": term2} 107 | }}}, 108 | {"nested": { 109 | "path": "my", "query": {"nested": {"path": "my.your", "query": bool_query1}}, 110 | }}, 111 | {"nested": {"path": "my", "_name": "bar", "query": { 112 | "nested": {"path": "my.your", "query": term1} 113 | }}}, 114 | ]) 115 | 116 | def test_multiple_parallel_nested(self): 117 | term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}} 118 | term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}} 119 | term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}} 120 | bool_query1 = {"bool": {"should": [term1]}} 121 | bool_query2 = {"bool": {"must_not": [term2]}} 122 | nested1 = {"nested": {"path": "my.your", "query": bool_query1}} 123 | nested2 = {"nested": {"path": "my.his", "query": bool_query2}} 124 | bool_query3 = {"bool": {"should": [nested2, nested1]}} 125 | bool_query4 = {"bool": {"must": [term3, bool_query3]}} 126 | queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}}) 127 | self.assertEqual(queries, [ 128 | {"nested": {"path": "my", "query": term3, "_name": "spam"}}, 129 | {"nested": {"path": "my", "query": bool_query3}}, 130 | {"nested": {"path": "my", "query": nested2}}, 131 | {"nested": {"path": "my", "query": nested1}}, 132 | {"nested": {"path": "my", "_name": "foo", "query": { 133 | "nested": {"path": "my.his", "query": term2} 134 | }}}, 135 | {"nested": {"path": "my", "_name": "bar", "query": { 136 | "nested": {"path": "my.your", "query": term1} 137 | }}}, 138 | ]) 139 | 140 | def test_get_first_name(self): 141 | term = {"term": {"text": {"value": "bar", "_name": "bar"}}} 142 | query = [{"query": term, "_name": "spam"}, {"query": term, "_name": "beurre"}] 143 | name = get_first_name(query) 144 | self.assertEqual(name, "spam") 145 | -------------------------------------------------------------------------------- /tests/test_elasticsearch/test_schema.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.elasticsearch.schema import SchemaAnalyzer 4 | 5 | 6 | try: 7 | import elasticsearch_dsl 8 | ES_6 = elasticsearch_dsl.VERSION[0] >= 6 9 | except ImportError: 10 | ES_6 = True 11 | 12 | 13 | class SchemaAnalyzerTestCase(TestCase): 14 | 15 | MAPPING = { 16 | "properties": { 17 | "text": {"type": "text"}, 18 | "author": { 19 | "type": "nested", 20 | "properties": { 21 | "firstname": { 22 | "type": "text", 23 | "fields": { 24 | # sub fields 25 | "english": {"analyzer": "english"}, 26 | "raw": {"type": "keyword"}, 27 | } 28 | }, 29 | "lastname": {"type": "text"}, 30 | "book": { 31 | "type": "nested", 32 | "properties": { 33 | "title": {"type": "text"}, 34 | "isbn": { # an object field in deep nested field 35 | "type": "object", 36 | "properties": { 37 | "ref": { 38 | "type": "keyword", 39 | }, 40 | }, 41 | }, 42 | "format": { 43 | "type": "nested", 44 | "properties": { 45 | "ftype": {"type": "keyword"}, 46 | }, 47 | }, 48 | }, 49 | }, 50 | }, 51 | }, 52 | "publish": { 53 | "type": "nested", 54 | "properties": { 55 | "site": {"type": "keyword"}, 56 | "idnum": {"type": "long"}, 57 | }, 58 | }, 59 | "manager": { 60 | "type": "object", 61 | "properties": { 62 | "firstname": {"type": "text"}, 63 | "address": { # an object field in an object field 64 | "type": "object", 65 | "properties": { 66 | "zipcode": {"type": "keyword"}, 67 | }, 68 | }, 69 | "subteams": { # a nested in an object field 70 | "type": "nested", 71 | "properties": { 72 | "supervisor": { # with an object field inside 73 | "type": "object", 74 | "properties": { 75 | "name": { 76 | "type": "text", 77 | # sub field 78 | "fields": {"raw": {"type": "keyword"}}, 79 | }, 80 | }, 81 | }, 82 | }, 83 | }, 84 | }, 85 | }, 86 | }, 87 | } 88 | 89 | INDEX_SETTINGS = { 90 | "settings": { 91 | "query": {"default_field": "text"}, 92 | }, 93 | "mappings": {}, 94 | } 95 | 96 | def setUp(self): 97 | super().setUp() 98 | if ES_6: 99 | self.INDEX_SETTINGS["mappings"] = self.MAPPING 100 | else: 101 | self.INDEX_SETTINGS["mappings"]["type1"] = self.MAPPING 102 | 103 | def test_default_field(self): 104 | s = SchemaAnalyzer(self.INDEX_SETTINGS) 105 | self.assertEqual(s.default_field(), "text") 106 | 107 | def test_not_analyzed_fields(self): 108 | s = SchemaAnalyzer(self.INDEX_SETTINGS) 109 | self.assertEqual( 110 | sorted(s.not_analyzed_fields()), 111 | [ 112 | 'author.book.format.ftype', 113 | 'author.book.isbn.ref', 114 | 'author.firstname.raw', 115 | 'manager.address.zipcode', 116 | 'manager.subteams.supervisor.name.raw', 117 | 'publish.idnum', 118 | 'publish.site', 119 | ], 120 | ) 121 | 122 | def test_nested_fields(self): 123 | s = SchemaAnalyzer(self.INDEX_SETTINGS) 124 | self.assertEqual( 125 | s.nested_fields(), 126 | { 127 | 'author': { 128 | 'firstname': {}, 129 | 'lastname': {}, 130 | 'book': { 131 | 'format': { 132 | 'ftype': {} 133 | }, 134 | 'title': {}, 135 | 'isbn': {}, 136 | }, 137 | }, 138 | 'publish': { 139 | 'site': {}, 140 | 'idnum': {}, 141 | }, 142 | 'manager.subteams': { # FIXME !!!! 143 | 'supervisor': {}, 144 | }, 145 | } 146 | ) 147 | 148 | def test_object_fields(self): 149 | s = SchemaAnalyzer(self.INDEX_SETTINGS) 150 | self.assertEqual( 151 | sorted(s.object_fields()), 152 | [ 153 | 'author.book.isbn.ref', 154 | 'manager.address.zipcode', 155 | 'manager.firstname', 156 | 'manager.subteams.supervisor.name', 157 | ] 158 | ) 159 | 160 | def test_sub_fields(self): 161 | s = SchemaAnalyzer(self.INDEX_SETTINGS) 162 | self.assertEqual( 163 | sorted(s.sub_fields()), 164 | [ 165 | 'author.firstname.english', 166 | 'author.firstname.raw', 167 | 'manager.subteams.supervisor.name.raw', 168 | ] 169 | ) 170 | 171 | def test_empty(self): 172 | s = SchemaAnalyzer({}) 173 | self.assertEqual(s.default_field(), "*") 174 | self.assertEqual(list(s.not_analyzed_fields()), []) 175 | self.assertEqual(s.nested_fields(), {}) 176 | self.assertEqual(list(s.object_fields()), []) 177 | -------------------------------------------------------------------------------- /tests/test_pretty.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.pretty import Prettifier, prettify 4 | from luqum.tree import ( 5 | SearchField, FieldGroup, Group, Word, AndOperation, OrOperation, UnknownOperation) 6 | 7 | 8 | class TestPrettify(TestCase): 9 | 10 | big_tree = AndOperation( 11 | Group(OrOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))), Word("fooooooooooo")) 12 | fat_tree = AndOperation( 13 | SearchField( 14 | "subject", 15 | FieldGroup( 16 | OrOperation( 17 | Word("fiiiiiiiiiiz"), 18 | AndOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))))), 19 | AndOperation(Word("fooooooooooo"), Word("wiiiiiiiiiz"))) 20 | 21 | def test_one_liner(self): 22 | tree = AndOperation(Group(OrOperation(Word("bar"), Word("baz"))), Word("foo")) 23 | self.assertEqual(prettify(tree), "( bar OR baz ) AND foo") 24 | 25 | def test_with_unknown_op(self): 26 | prettify = Prettifier(indent=8, max_len=20) 27 | tree = UnknownOperation( 28 | Group( 29 | UnknownOperation( 30 | Word("baaaaaaaaaar"), 31 | Word("baaaaaaaaaaaaaz"))), 32 | Word("fooooooooooo")) 33 | self.assertEqual( 34 | "\n" + prettify(tree), """ 35 | ( 36 | baaaaaaaaaar 37 | baaaaaaaaaaaaaz 38 | ) 39 | fooooooooooo""") 40 | 41 | def test_with_unknown_op_nested(self): 42 | prettify = Prettifier(indent=8, max_len=20) 43 | tree = OrOperation( 44 | UnknownOperation( 45 | Word("baaaaaaaaaar"), 46 | Word("baaaaaaaaaaaaaz")), 47 | Word("fooooooooooo")) 48 | self.assertEqual( 49 | "\n" + prettify(tree), """ 50 | baaaaaaaaaar 51 | baaaaaaaaaaaaaz 52 | OR 53 | fooooooooooo""") 54 | 55 | def test_small(self): 56 | prettify = Prettifier(indent=8, max_len=20) 57 | self.assertEqual( 58 | "\n" + prettify(self.big_tree), """ 59 | ( 60 | baaaaaaaaaar 61 | OR 62 | baaaaaaaaaaaaaz 63 | ) 64 | AND 65 | fooooooooooo""") 66 | self.assertEqual( 67 | "\n" + prettify(self.fat_tree), """ 68 | subject: ( 69 | fiiiiiiiiiiz 70 | OR 71 | baaaaaaaaaar 72 | AND 73 | baaaaaaaaaaaaaz 74 | ) 75 | AND 76 | fooooooooooo 77 | AND 78 | wiiiiiiiiiz""") 79 | 80 | def test_small_inline_ops(self): 81 | prettify = Prettifier(indent=8, max_len=20, inline_ops=True) 82 | self.assertEqual("\n" + prettify(self.big_tree), """ 83 | ( 84 | baaaaaaaaaar OR 85 | baaaaaaaaaaaaaz ) AND 86 | fooooooooooo""") 87 | self.assertEqual("\n" + prettify(self.fat_tree), """ 88 | subject: ( 89 | fiiiiiiiiiiz OR 90 | baaaaaaaaaar AND 91 | baaaaaaaaaaaaaz ) AND 92 | fooooooooooo AND 93 | wiiiiiiiiiz""") 94 | 95 | def test_normal(self): 96 | prettify = Prettifier(indent=4, max_len=50) 97 | self.assertEqual("\n" + prettify(self.big_tree), """ 98 | ( 99 | baaaaaaaaaar OR baaaaaaaaaaaaaz 100 | ) 101 | AND 102 | fooooooooooo""") 103 | self.assertEqual("\n" + prettify(self.fat_tree), """ 104 | subject: ( 105 | fiiiiiiiiiiz 106 | OR 107 | baaaaaaaaaar AND baaaaaaaaaaaaaz 108 | ) 109 | AND 110 | fooooooooooo 111 | AND 112 | wiiiiiiiiiz""") 113 | 114 | def test_normal_inline_ops(self): 115 | prettify = Prettifier(indent=4, max_len=50, inline_ops=True) 116 | self.assertEqual("\n" + prettify(self.big_tree), """ 117 | ( 118 | baaaaaaaaaar OR baaaaaaaaaaaaaz ) AND 119 | fooooooooooo""") 120 | self.assertEqual("\n" + prettify(self.fat_tree), """ 121 | subject: ( 122 | fiiiiiiiiiiz OR 123 | baaaaaaaaaar AND baaaaaaaaaaaaaz ) AND 124 | fooooooooooo AND 125 | wiiiiiiiiiz""") 126 | -------------------------------------------------------------------------------- /tests/test_quick_start.rst: -------------------------------------------------------------------------------- 1 | ../docs/source/quick_start.rst -------------------------------------------------------------------------------- /tests/test_thread.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import threading 3 | 4 | import ply.lex as lex 5 | 6 | from luqum.parser import parser 7 | from luqum.thread import parse 8 | from tests import alternative_lexer 9 | 10 | 11 | def test_thread_parse(): 12 | 13 | result_queue = queue.Queue() 14 | qs1 = """ 15 | (title:"foo bar" AND body:"quick fox") OR title:fox AND 16 | (title:"foo bar" AND body:"quick fox") OR 17 | title:fox AND (title:"foo bar" AND body:"quick fox") OR 18 | title:fox AND (title:"foo bar" AND body:"quick fox") OR 19 | title:fox AND (title:"foo bar" AND body:"quick fox") OR title:fox 20 | """ 21 | expected_tree = parser.parse(qs1) 22 | 23 | def run(q): 24 | parse(qs1) 25 | tree = parse(qs1) 26 | q.put(tree) 27 | 28 | # make concurrents calls 29 | threads = [threading.Thread(target=run, args=(result_queue,)) for i in range(100)] 30 | for thread in threads: 31 | thread.start() 32 | for thread in threads: 33 | thread.join() 34 | assert result_queue.qsize() == 100 35 | for i in range(100): 36 | assert result_queue.get() == expected_tree 37 | 38 | 39 | def test_thread_lex_global_state(): 40 | """ 41 | Last Lexer is used globally by default by the parser. If another library 42 | creates another lexer, it should not impact luqum. 43 | 44 | More info: [Multiple Parsers and 45 | Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37) 46 | """ 47 | qs = '(title:"foo bar" AND body:"quick fox")' 48 | 49 | lex.lex(module=alternative_lexer) 50 | # if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was 51 | # used. 52 | parse(qs) 53 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from luqum.parser import parser 4 | from luqum.tree import (Group, Word, AndOperation, OrOperation, BoolOperation, 5 | UnknownOperation, Prohibit, Plus, From, To, Range, SearchField, 6 | Boost) 7 | from luqum.utils import UnknownOperationResolver, OpenRangeTransformer 8 | 9 | 10 | class UnknownOperationResolverTestCase(TestCase): 11 | 12 | def test_and_resolution(self): 13 | tree = ( 14 | UnknownOperation( 15 | Word("a"), 16 | Word("b"), 17 | OrOperation(Word("c"), Word("d")))) 18 | expected = ( 19 | AndOperation( 20 | Word("a"), 21 | Word("b"), 22 | OrOperation(Word("c"), Word("d")))) 23 | resolver = UnknownOperationResolver(resolve_to=AndOperation) 24 | self.assertEqual(resolver(tree), expected) 25 | 26 | def test_or_resolution(self): 27 | tree = ( 28 | UnknownOperation( 29 | Word("a"), 30 | Word("b"), 31 | AndOperation(Word("c"), Word("d")))) 32 | expected = ( 33 | OrOperation( 34 | Word("a"), 35 | Word("b"), 36 | AndOperation(Word("c"), Word("d")))) 37 | resolver = UnknownOperationResolver(resolve_to=OrOperation) 38 | self.assertEqual(resolver(tree), expected) 39 | 40 | def test_lucene_resolution_simple(self): 41 | tree = ( 42 | UnknownOperation( 43 | Word("a"), 44 | Word("b"), 45 | UnknownOperation(Word("c"), Word("d")))) 46 | expected = ( 47 | AndOperation( 48 | Word("a"), 49 | Word("b"), 50 | AndOperation(Word("c"), Word("d")))) 51 | resolver = UnknownOperationResolver(resolve_to=None) 52 | self.assertEqual(resolver(tree), expected) 53 | 54 | def test_lucene_resolution_bool(self): 55 | tree = parser.parse("a b (+f +g) -(c d) +e") 56 | expected = ( 57 | BoolOperation( 58 | Word("a"), 59 | Word("b"), 60 | Group(BoolOperation(Plus(Word("f")), Plus(Word("g")))), 61 | Prohibit(Group(BoolOperation(Word("c"), Word("d")))), 62 | Plus(Word('e')))) 63 | resolver = UnknownOperationResolver(resolve_to=BoolOperation) 64 | self.assertEqual(resolver(tree), expected) 65 | 66 | def test_lucene_resolution_last_op(self): 67 | tree = ( 68 | OrOperation( 69 | Word("a"), 70 | Word("b"), 71 | UnknownOperation(Word("c"), Word("d")), 72 | AndOperation( 73 | Word("e"), 74 | UnknownOperation(Word("f"), Word("g"))), 75 | UnknownOperation(Word("i"), Word("j")), 76 | OrOperation( 77 | Word("k"), 78 | UnknownOperation(Word("l"), Word("m"))), 79 | UnknownOperation(Word("n"), Word("o")))) 80 | expected = ( 81 | OrOperation( 82 | Word("a"), 83 | Word("b"), 84 | OrOperation(Word("c"), Word("d")), 85 | AndOperation( 86 | Word("e"), 87 | AndOperation(Word("f"), Word("g"))), 88 | AndOperation(Word("i"), Word("j")), 89 | OrOperation( 90 | Word("k"), 91 | OrOperation(Word("l"), Word("m"))), 92 | OrOperation(Word("n"), Word("o")))) 93 | resolver = UnknownOperationResolver(resolve_to=None) 94 | self.assertEqual(resolver(tree), expected) 95 | 96 | def test_lucene_resolution_last_op_with_group(self): 97 | tree = ( 98 | OrOperation( 99 | Word("a"), 100 | Word("b"), 101 | Group( 102 | AndOperation( 103 | Word("c"), 104 | UnknownOperation(Word("d"), Word("e")))), 105 | UnknownOperation(Word("f"), Word("g")), 106 | Group( 107 | UnknownOperation(Word("h"), Word("i"))))) 108 | expected = ( 109 | OrOperation( 110 | Word("a"), 111 | Word("b"), 112 | Group( 113 | AndOperation( 114 | Word("c"), 115 | AndOperation(Word("d"), Word("e")))), 116 | OrOperation(Word("f"), Word("g")), 117 | Group( 118 | AndOperation(Word("h"), Word("i"))))) 119 | resolver = UnknownOperationResolver(resolve_to=None) 120 | self.assertEqual(resolver(tree), expected) 121 | 122 | def test_resolve_to_verification(self): 123 | with self.assertRaises(ValueError): 124 | UnknownOperationResolver(resolve_to=object()) 125 | 126 | def test_head_tail_pos(self): 127 | tree = parser.parse("\ra\nb (c\t (d e f)) ") 128 | resolver = UnknownOperationResolver(resolve_to=None) 129 | transformed = resolver(tree) 130 | self.assertEqual(str(transformed), "\ra\nAND b AND (c\t AND (d AND e AND f)) ") 131 | self.assertEqual(transformed.pos, tree.pos) 132 | self.assertEqual(transformed.size, tree.size) 133 | and_op, orig_op = transformed.children[2].children[0], tree.children[2].children[0] 134 | self.assertEqual(type(and_op), AndOperation) 135 | self.assertEqual(and_op.pos, orig_op.pos) 136 | self.assertEqual(and_op.size, orig_op.size) 137 | and_op, orig_op = and_op.children[1].children[0], orig_op.children[1].children[0] 138 | self.assertEqual(type(and_op), AndOperation) 139 | self.assertEqual(and_op.pos, orig_op.pos) 140 | self.assertEqual(and_op.size, orig_op.size) 141 | 142 | resolver = UnknownOperationResolver(resolve_to=OrOperation) 143 | transformed = resolver(tree) 144 | self.assertEqual(str(transformed), "\ra\nOR b OR (c\t OR (d OR e OR f)) ") 145 | 146 | 147 | class OpenRangeTransformerTestCase(TestCase): 148 | def test_simple_resolution_from(self): 149 | tree = ( 150 | From(Word("1"), True) 151 | ) 152 | expected = ( 153 | Range(Word("1", tail=" "), Word("*", head=" "), True, True) 154 | ) 155 | for merge_ranges in (True, False): 156 | with self.subTest(merge_ranges=merge_ranges): 157 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges) 158 | output = resolver(tree) 159 | self.assertEqual(output, expected) 160 | self.assertEqual(str(output), str(expected)) 161 | 162 | def test_simple_resolution_to(self): 163 | tree = ( 164 | To(Word("1"), False) 165 | ) 166 | expected = ( 167 | Range(Word("*", tail=" "), Word("1", head=" "), True, False) 168 | ) 169 | for merge_ranges in (True, False): 170 | with self.subTest(merge_ranges=merge_ranges): 171 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges) 172 | output = resolver(tree) 173 | self.assertEqual(output, expected) 174 | self.assertEqual(str(output), str(expected)) 175 | 176 | def test_and_resolution(self): 177 | tree = ( 178 | AndOperation( 179 | From(Word("1"), True), 180 | To(Word("2"), True), 181 | ) 182 | ) 183 | expected = ( 184 | AndOperation( 185 | Range(Word("1", tail=" "), Word("2", head=" "), True, True) 186 | ) 187 | ) 188 | resolver = OpenRangeTransformer(merge_ranges=True) 189 | output = resolver(tree) 190 | self.assertEqual(output, expected) 191 | self.assertEqual(str(output), str(expected)) 192 | 193 | def test_and_resolution_without_merge(self): 194 | tree = ( 195 | AndOperation( 196 | From(Word("1"), True), 197 | To(Word("2"), True), 198 | ) 199 | ) 200 | expected = ( 201 | AndOperation( 202 | Range(Word("1", tail=" "), Word("*", head=" "), True), 203 | Range(Word("*", tail=" "), Word("2", head=" "), True), 204 | ) 205 | ) 206 | resolver = OpenRangeTransformer(merge_ranges=False) 207 | output = resolver(tree) 208 | self.assertEqual(output, expected) 209 | self.assertEqual(str(output), str(expected)) 210 | 211 | def test_unjoined_resolution(self): 212 | tree = ( 213 | AndOperation( 214 | From(Word("1"), False), 215 | From(Word("2"), True), 216 | ) 217 | ) 218 | expected = ( 219 | AndOperation( 220 | Range(Word("1", tail=" "), Word("*", head=" "), False, True), 221 | Range(Word("2", tail=" "), Word("*", head=" "), True, True) 222 | ) 223 | ) 224 | resolver = OpenRangeTransformer(merge_ranges=True) 225 | output = resolver(tree) 226 | self.assertEqual(output, expected) 227 | self.assertEqual(str(output), str(expected)) 228 | 229 | def test_normal_ranges_are_untouched(self): 230 | tree = ( 231 | AndOperation( 232 | Range(Word("1"), Word("2"), True, True), 233 | Range(Word("*"), Word("*"), True, True), 234 | Range(Word("1"), Word("*"), True, True), 235 | ) 236 | ) 237 | for merge_ranges in (True, False): 238 | with self.subTest(merge_ranges=merge_ranges): 239 | resolver = OpenRangeTransformer(merge_ranges=merge_ranges) 240 | output = resolver(tree) 241 | self.assertEqual(output, tree) 242 | 243 | def test_first_range_is_merged(self): 244 | tree = ( 245 | AndOperation( 246 | Range(Word("*"), Word("2"), True, True), 247 | Range(Word("*"), Word("*"), True, True), 248 | Range(Word("*"), Word("3"), True, True), 249 | Range(Word("1"), Word("*"), True, True), 250 | Range(Word("4"), Word("*"), True, True), 251 | ) 252 | ) 253 | expected = ( 254 | AndOperation( 255 | Range(Word("1"), Word("2"), True, True), 256 | Range(Word("*"), Word("*"), True, True), 257 | Range(Word("4"), Word("3"), True, True), 258 | ) 259 | ) 260 | resolver = OpenRangeTransformer(merge_ranges=True) 261 | output = resolver(tree) 262 | self.assertEqual(output, expected) 263 | self.assertEqual(str(output), str(expected)) 264 | 265 | def test_do_not_merge_unknown(self): 266 | tree = ( 267 | UnknownOperation( 268 | Range(Word("1"), Word("*"), True, True), 269 | Range(Word("*"), Word("2"), True, True), 270 | ) 271 | ) 272 | resolver = OpenRangeTransformer(merge_ranges=True) 273 | output = resolver(tree) 274 | self.assertEqual(output, tree) 275 | 276 | def test_do_not_merge_searchfield(self): 277 | tree = ( 278 | AndOperation( 279 | Range(Word("1"), Word("*"), True, True), 280 | SearchField("foo", Range(Word("*"), Word("2"), True, True)) 281 | ) 282 | ) 283 | resolver = OpenRangeTransformer(merge_ranges=True) 284 | output = resolver(tree) 285 | self.assertEqual(output, tree) 286 | 287 | def test_do_not_merge_boosted(self): 288 | tree = ( 289 | AndOperation( 290 | Boost(Range(Word("1"), Word("*"), True, True), 2), 291 | Boost(Range(Word("*"), Word("2"), True, True), 2), 292 | ) 293 | ) 294 | resolver = OpenRangeTransformer(merge_ranges=True) 295 | output = resolver(tree) 296 | self.assertEqual(output, tree) 297 | -------------------------------------------------------------------------------- /tests/test_visitor.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | from unittest import TestCase 4 | 5 | from luqum.tree import ( 6 | NONE_ITEM, Group, Word, Phrase, AndOperation, OrOperation, Proximity, SearchField, 7 | Boost, Fuzzy, Regex, 8 | ) 9 | from luqum.visitor import ( 10 | PathTrackingTransformer, PathTrackingVisitor, TreeTransformer, TreeVisitor, 11 | ) 12 | 13 | 14 | class TreeVisitorTestCase(TestCase): 15 | 16 | class BasicVisitor(TreeVisitor): 17 | """Dummy visitor, simply yielding a list of nodes. """ 18 | 19 | def generic_visit(self, node, context): 20 | yield node 21 | yield from super().generic_visit(node, context) 22 | 23 | class TrackingParentsVisitor(TreeVisitor): 24 | """Visitor, yielding nodes and parents.""" 25 | 26 | def generic_visit(self, node, context): 27 | yield node, context.get("parents") 28 | yield from super().generic_visit(node, context) 29 | 30 | class MROVisitor(TreeVisitor): 31 | 32 | def visit_or_operation(self, node, context): 33 | yield "{} OR {}".format(*node.children) 34 | yield from super().generic_visit(node, context) 35 | 36 | def visit_base_operation(self, node, context): 37 | yield "{} BASE_OP {}".format(*node.children) 38 | yield from super().generic_visit(node, context) 39 | 40 | def visit_word(self, node, parents=[]): 41 | yield node.value 42 | 43 | def test_generic_visit(self): 44 | tree = AndOperation(Word("foo"), Word("bar")) 45 | visitor = TreeVisitor() 46 | nodes = visitor.visit(tree) 47 | self.assertEqual(nodes, []) 48 | # with a context for coverage… 49 | nodes = visitor.visit(tree, context={}) 50 | self.assertEqual(nodes, []) 51 | 52 | def test_basic_traversal(self): 53 | tree = AndOperation(Word("foo"), Word("bar")) 54 | visitor = self.BasicVisitor() 55 | nodes = visitor.visit(tree) 56 | self.assertListEqual([tree, Word("foo"), Word("bar")], nodes) 57 | 58 | def test_parents_tracking(self): 59 | tree = AndOperation(Word("foo"), Proximity(Phrase('"bar"'), 2)) 60 | visitor = self.TrackingParentsVisitor(track_parents=True) 61 | nodes = visitor.visit(tree) 62 | self.assertListEqual( 63 | [ 64 | (tree, None), 65 | (Word("foo"), (tree,)), 66 | (Proximity(Phrase('"bar"'), degree=2), (tree,)), 67 | (Phrase('"bar"'), (tree, Proximity(Phrase('"bar"'), 2))), 68 | ], 69 | nodes, 70 | ) 71 | 72 | def test_parents_tracking_no_tracking(self): 73 | tree = AndOperation(Word("foo"), Phrase('"bar"')) 74 | # no parents tracking ! 75 | visitor = self.TrackingParentsVisitor() 76 | nodes = visitor.visit(tree) 77 | self.assertListEqual([(tree, None), (Word("foo"), None), (Phrase('"bar"'), None)], nodes) 78 | 79 | def test_mro(self): 80 | visitor = self.MROVisitor() 81 | 82 | tree = OrOperation(Word('a'), Word('b')) 83 | result = visitor.visit(tree) 84 | self.assertEqual(list(result), ['a OR b', 'a', 'b']) 85 | 86 | # AndOperation has no specific method, 87 | # but inherists BaseOperation, hence uses visit_base_operation 88 | tree = AndOperation(Word('a'), Word('b')) 89 | result = visitor.visit(tree) 90 | self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b']) 91 | 92 | 93 | class TreeTransformerTestCase(TestCase): 94 | 95 | class BasicTransformer(TreeTransformer): 96 | """ 97 | Dummy transformer that simply turn any Word node's value into "lol" 98 | """ 99 | def visit_word(self, node, context): 100 | yield Word(context.get("replacement", 'lol')) 101 | 102 | def visit_phrase(self, node, context): 103 | yield from [] 104 | 105 | def visit_base_operation(self, node, context): 106 | new_node, = super().generic_visit(node, context) 107 | # if new_node has no operands, it's like a removal 108 | if len(new_node.children) == 0: 109 | return 110 | # if we have only one operands return it 111 | elif len(new_node.children) == 1: 112 | yield new_node.children[0] 113 | else: 114 | # normal return 115 | yield new_node 116 | 117 | class TrackingParentsTransformer(TreeTransformer): 118 | 119 | def visit_word(self, node, context): 120 | new_node, = self.generic_visit(node, context) 121 | if any(isinstance(p, SearchField) for p in context["new_parents"]): 122 | new_node.value = "lol" 123 | yield new_node 124 | 125 | class RaisingTreeTransformer(TreeTransformer): 126 | 127 | def generic_visit(self, node, context): 128 | yield node 129 | yield node 130 | 131 | class RaisingTreeTransformer2(TreeTransformer): 132 | 133 | def generic_visit(self, node, context): 134 | raise ValueError("Random error") 135 | 136 | def test_basic_traversal(self): 137 | tree = AndOperation(Word("foo"), Word("bar")) 138 | 139 | transformer = self.BasicTransformer() 140 | new_tree = transformer.visit(tree) 141 | self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol"))) 142 | 143 | def test_context_value(self): 144 | tree = AndOperation(Word("foo"), Word("bar")) 145 | 146 | transformer = self.BasicTransformer() 147 | new_tree = transformer.visit(tree, context={"replacement": "rotfl"}) 148 | self.assertEqual(new_tree, AndOperation(Word("rotfl"), Word("rotfl"))) 149 | 150 | def test_no_transform(self): 151 | tree = AndOperation(NONE_ITEM, NONE_ITEM) 152 | transformer = self.BasicTransformer() 153 | new_tree = transformer.visit(tree) 154 | self.assertEqual(new_tree, tree) 155 | 156 | def test_one_word(self): 157 | tree = Word("foo") 158 | transformer = self.BasicTransformer() 159 | new_tree = transformer.visit(tree) 160 | self.assertEqual(new_tree, Word("lol")) 161 | 162 | def test_tracking_parents(self): 163 | tree = OrOperation(Word("foo"), SearchField("test", Word("bar"))) 164 | expected = OrOperation(Word("foo"), SearchField("test", Word("lol"))) 165 | transformer = self.TrackingParentsTransformer(track_new_parents=True) 166 | new_tree = transformer.visit(tree) 167 | self.assertEqual(new_tree, expected) 168 | 169 | def test_removal(self): 170 | tree = AndOperation( 171 | OrOperation(Word("spam"), Word("ham")), 172 | AndOperation(Word("foo"), Phrase('"bar"')), 173 | AndOperation(Phrase('"baz"'), Phrase('"biz"')), 174 | ) 175 | 176 | transformer = self.BasicTransformer() 177 | new_tree = transformer.visit(tree) 178 | 179 | self.assertEqual( 180 | new_tree, 181 | AndOperation(OrOperation(Word("lol"), Word("lol")), Word("lol")), 182 | ) 183 | 184 | def test_silent_value_error(self): 185 | # in the case some attribute mislead the search for node do not raise 186 | tree = AndOperation(Word("a"), Word("b")) 187 | setattr(tree, "misleading1", ()) 188 | setattr(tree, "misleading2", []) 189 | # hackishly patch __dict__ to be sure we have operands in right order for test 190 | tree.__dict__ = collections.OrderedDict(tree.__dict__) 191 | tree.__dict__['operands'] = tree.__dict__.pop('operands') # operands are now last 192 | 193 | transformer = self.BasicTransformer() 194 | new_tree = transformer.visit(tree) 195 | 196 | self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol"))) 197 | 198 | def test_repeating_expression(self): 199 | # non regression test 200 | tree = AndOperation( 201 | Group(OrOperation(Word('bar'), Word('foo'))), 202 | Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))), 203 | ) 204 | # basic transformer should not change tree 205 | same_tree = TreeTransformer().visit(copy.deepcopy(tree)) 206 | self.assertEqual(same_tree, tree) 207 | 208 | def test_more_than_one_element_raises(self): 209 | tree = Word("foo") 210 | with self.assertRaises(ValueError) as raised: 211 | self.RaisingTreeTransformer().visit(tree) 212 | self.assertIn( 213 | "The visit of the tree should have produced exactly one element", 214 | str(raised.exception), 215 | ) 216 | 217 | def test_value_error_pass_through(self): 218 | # raising a value error that is not related to unpacking passed through 219 | tree = Word("foo") 220 | with self.assertRaises(ValueError) as raised: 221 | self.RaisingTreeTransformer2().visit(tree) 222 | self.assertEqual("Random error", str(raised.exception)) 223 | 224 | 225 | class PathTrackingVisitorTestCase(TestCase): 226 | 227 | class TermPathVisitor(PathTrackingVisitor): 228 | 229 | def visit_term(self, node, context): 230 | yield (context["path"], node.value) 231 | 232 | @classmethod 233 | def setUpClass(cls): 234 | cls.visit = cls.TermPathVisitor().visit 235 | 236 | def test_visit_simple_term(self): 237 | paths = self.visit(Word("foo")) 238 | self.assertEqual(paths, [((), "foo")]) 239 | 240 | def test_visit_complex(self): 241 | tree = AndOperation( 242 | Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))), 243 | Proximity(Phrase('"spam ham"')), 244 | SearchField("fizz", Regex("/fuzz/")), 245 | ) 246 | paths = self.visit(tree) 247 | self.assertEqual( 248 | sorted(paths), 249 | [ 250 | ((0, 0, 0), "foo"), 251 | ((0, 0, 1), "bar"), 252 | ((0, 0, 2, 0, 0), "baz"), 253 | ((1, 0), '"spam ham"'), 254 | ((2, 0), '/fuzz/'), 255 | ] 256 | ) 257 | 258 | 259 | class PathTrackingTransformerTestCase(TestCase): 260 | 261 | class TermPathTransformer(PathTrackingTransformer): 262 | 263 | def visit_term(self, node, context): 264 | path = '-'.join(str(i) for i in context['path']) 265 | quote = '"' if isinstance(node, Phrase) else "/" if isinstance(node, Regex) else "" 266 | value = node.value.strip(quote) 267 | new_node = node.clone_item(value=f"{quote}{value}@{path}{quote}") 268 | yield new_node 269 | 270 | @classmethod 271 | def setUpClass(cls): 272 | cls.transform = cls.TermPathTransformer().visit 273 | 274 | def test_visit_simple_term(self): 275 | tree = self.transform(Word("foo")) 276 | self.assertEqual(tree, Word("foo@")) 277 | 278 | def test_visit_complex(self): 279 | tree = AndOperation( 280 | Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))), 281 | Proximity(Phrase('"spam ham"')), 282 | SearchField("fizz", Regex("/fuzz/")), 283 | ) 284 | transformed = self.transform(tree) 285 | expected = AndOperation( 286 | Group(OrOperation( 287 | Word("foo@0-0-0"), 288 | Word("bar@0-0-1"), 289 | Boost(Fuzzy(Word("baz@0-0-2-0-0")), force=2), 290 | )), 291 | Proximity(Phrase('"spam ham@1-0"')), 292 | SearchField("fizz", Regex("/fuzz@2-0/")), 293 | ) 294 | self.assertEqual(transformed, expected) 295 | --------------------------------------------------------------------------------