├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── CHANGELOG.rst
├── LICENSE
├── LICENSE.Apache2
├── LICENSE.LGPLv3
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── _static
    │       └── luqum-logo.png
    │   ├── about.rst
    │   ├── api.rst
    │   ├── conf.py
    │   ├── index.rst
    │   ├── install.rst
    │   └── quick_start.rst
├── luqum-logo.png
├── luqum
    ├── __init__.py
    ├── auto_head_tail.py
    ├── check.py
    ├── deprecated_utils.py
    ├── elasticsearch
    │   ├── __init__.py
    │   ├── nested.py
    │   ├── schema.py
    │   ├── tree.py
    │   └── visitor.py
    ├── exceptions.py
    ├── head_tail.py
    ├── naming.py
    ├── parser.py
    ├── parsetab.py
    ├── pretty.py
    ├── thread.py
    ├── tree.py
    ├── utils.py
    └── visitor.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── alternative_lexer.py
    ├── test_auto_head_tail.py
    ├── test_check.py
    ├── test_deprecated_utils.py
    ├── test_elasticsearch
        ├── __init__.py
        ├── book.json
        ├── es_integration_utils.py
        ├── test_es_integration.py
        ├── test_es_naming.py
        ├── test_estree.py
        ├── test_naming.py
        ├── test_nested.py
        ├── test_schema.py
        └── tests.py
    ├── test_headtail.py
    ├── test_naming.py
    ├── test_parser.py
    ├── test_pretty.py
    ├── test_quick_start.rst
    ├── test_thread.py
    ├── test_tree.py
    ├── test_utils.py
    └── test_visitor.py


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: code quality checks and unit tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | 
 7 | jobs:
 8 |   # quality
 9 |   quality_checks:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version:
14 |         - "3.10"
15 |         - "3.11"
16 |         - "3.12"
17 |         - "3.13"
18 |         es-version:
19 |         - "8.17.1"
20 |         es-dsl-version:
21 |         - "8.17.1"
22 |         include:
23 |           # only test older ES version with python 3.10
24 |           - python-version: "3.10"
25 |             es-version: "6.8.23"
26 |             es-dsl-version: "6.4.0"
27 |           - python-version: "3.10"
28 |             es-version: "7.17.6"
29 |             es-dsl-version: "7.4.0"
30 |           # but also runs with newer versions of ES
31 |           - python-version: "3.10"
32 |             es-version: "8.17.1"
33 |             es-dsl-version: "8.17.1"
34 | 
35 |     env:
36 |       ES_VERSION: "${{ matrix.es-version }}"
37 |     steps:
38 |       #----------------------------------------------
39 |       #       check-out repo and set-up python
40 |       #----------------------------------------------
41 |       - name: Check out repository
42 |         uses: actions/checkout@v3
43 |       - name: Setup python
44 |         uses: actions/setup-python@v4
45 |         with:
46 |           python-version: "${{ matrix.python-version }}"
47 |       - name: install project
48 |         run: |
49 |           pip install -r requirements-dev.txt
50 |           pip install .
51 |           # coveralls yet incompatible with python3.13 yet
52 |           [[ ${{ matrix.python-version }} != 3.13 ]] && pip install coveralls
53 |           pip install  elasticsearch-dsl==${{ matrix.es-dsl-version }}
54 |       - name: run tests
55 |         run: |
56 |           make quality && \
57 |           make es_tests
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # PLY
 7 | parser.out
 8 | parsetab.py
 9 | 
10 | # coverage
11 | cover/
12 | 
13 | # C extensions
14 | *.so
15 | 
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | 
34 | # PyInstaller
35 | #  Usually these files are written by a python script from a template
36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | 
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | .venv
51 | nosetests.xml
52 | coverage.xml
53 | *,cover
54 | .hypothesis/
55 | 
56 | # Translations
57 | *.mo
58 | *.pot
59 | 
60 | # Django stuff:
61 | *.log
62 | 
63 | # Sphinx documentation
64 | docs/build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # nose
70 | .noseids
71 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the version of Python and other tools you might need
 8 | build:
 9 |   os: ubuntu-20.04
10 |   tools:
11 |     python: "3.9"
12 | 
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 |    configuration: docs/source/conf.py
16 | 
17 | # If using Sphinx, optionally build your docs in additional formats such as PDF
18 | formats:
19 |     - pdf
20 |     - epub
21 | 
22 | # Optionally declare the Python requirements required to build your docs
23 | python:
24 |    install:
25 |    - requirements: requirements-dev.txt


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | matrix:
 4 |   include:
 5 |     - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1
 6 |       python: 3.7
 7 |       sudo: true
 8 |       dist: bionic
 9 |     - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
10 |       python: 3.7
11 |       dist: bionic
12 |       sudo: true
13 |     - env: ES_APT_URL=https://packages.elastic.co/elasticsearch/2.x/debian ES_DSL_VERS=2.2.0 ES_VERS=2.2.1
14 |       python: 3.8
15 |       sudo: true
16 |       dist: bionic
17 |     - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
18 |       python: 3.8
19 |       dist: bionic
20 |       sudo: true
21 |     - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
22 |       python: 3.9
23 |       dist: bionic
24 |       sudo: true
25 |     - env: ES_APT_URL=https://artifacts.elastic.co/packages/6.x/apt ES_DSL_VERS=6.3.1 ES_VERS=6.4.3
26 |       python: 3.10
27 |       dist: bionic
28 |       sudo: true
29 |     - env: ES_APT_URL=https://artifacts.elastic.co/packages/7.x/apt ES_DSL_VERS=7.2.1 ES_VERS=7.8.0
30 |       python: 3.10
31 |       dist: bionic
32 |       sudo: true
33 | 
34 | 
35 | before_install:
36 |   - sudo rm /etc/apt/sources.list; sudo touch /etc/apt/sources.list
37 |   - wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
38 |   - echo "deb $ES_APT_URL stable main" | sudo tee -a /etc/apt/sources.list.d/elastic.list
39 |   - sudo apt-get update && sudo apt-get install elasticsearch=$ES_VERS -y --allow-downgrades
40 |   - sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch 
41 |   - sudo systemctl start elasticsearch.service
42 |   - while ! curl -XGET "localhost:9200";do sleep 1; done
43 |   - curl localhost:9200
44 | 
45 | install:
46 |   - pip install .
47 |   - pip install -r requirements-dev.txt
48 |   - pip install coveralls
49 |   - pip install elasticsearch-dsl==$ES_DSL_VERS
50 | 
51 | script:
52 |   - make tests
53 |   - make quality
54 | 
55 | after_success:
56 |   # coveralls only for python3.8 and ES 7
57 |   -  python --version |grep 3.8 && [ $ES_DSL_VERS == "7.2.1" ] && coveralls
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This project is dualed licensed.
2 | 
3 | See LICENSE.LGPLv3 and LICENSE.Apache2
4 | 


--------------------------------------------------------------------------------
/LICENSE.Apache2:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/LICENSE.LGPLv3:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include *.rst
3 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | ES_VERSION ?= 8.17.1
 2 | 
 3 | tests:
 4 | 	pytest
 5 | 
 6 | # integration test with ES using docker
 7 | es_tests:
 8 | 	( docker ps |grep luqum_test_es ) || \
 9 | 	docker run --name luqum_test_es --rm -d -ti -p 127.0.0.1:9200:9200 \
10 | 		-e "discovery.type=single-node" -e  "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
11 | 		-e "xpack.security.enabled=false" \
12 | 		elasticsearch:${ES_VERSION}
13 | # wait ES to be ready
14 | 	@echo "waiting for ES to be ready"
15 | 	@while ! curl -XGET "localhost:9200" >/dev/null 2>&1;do sleep 1; echo -n "."; done
16 | 	pytest
17 | 	docker stop luqum_test_es
18 | 
19 | quality:
20 | 	flake8 luqum tests
21 | 
22 | # To upload files, you need to have a ~/.pypirc file locally.
23 | # This file should contain all the necessary passwords and API-tokens.
24 | distribute:
25 | 	rm -r build
26 | 	rm dist/*
27 | 	python -m build --wheel
28 | 	python -m build --sdist
29 | 	python -m twine upload --verbose --repository luqum dist/*
30 | 
31 | .PHONY: tests quality distribute
32 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | luqum - A lucene query parser in Python, using PLY
 2 | #########################################################
 3 | 
 4 | |pypi-version| |readthedocs| |travis| |coveralls|
 5 | 
 6 | |logo|
 7 | 
 8 | "luqum" (as in LUcene QUery Manipolator) is a tool to parse queries
 9 | written in the `Lucene Query DSL`_ and build an abstract syntax tree
10 | to inspect, analyze or otherwise manipulate search queries.
11 | 
12 | It enables enriching the Lucene Query DSL meanings
13 | (for example to support nested object searches or have particular treatments on some fields),
14 | and transform lucene DSL queries to native `ElasticSearch JSON DSL`_
15 | 
16 | Thanks to luqum, your users may continue to write queries like:
17 | `author.last_name:Smith OR author:(age:[25 TO 34] AND first_name:John)`
18 | and you will be able to leverage ElasticSearch query DSL,
19 | and control the precise meaning of each search terms.
20 | 
21 | Luqum is dual licensed under Apache2.0 and LGPLv3.
22 | 
23 | Compatible with Python 3.10+
24 | 
25 | Installation
26 | ============
27 | 
28 | ``pip install luqum``
29 | 
30 | 
31 | Dependencies
32 | ============
33 | 
34 | `PLY`_ >= 3.11
35 | 
36 | 
37 | Full documentation
38 | ==================
39 | 
40 | http://luqum.readthedocs.org/en/latest/
41 | 
42 | 
43 | .. _`Lucene Query DSL` : https://lucene.apache.org/core/3_6_0/queryparsersyntax.html
44 | .. _`ElasticSearch JSON DSL`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
45 | 
46 | .. _`PLY`: http://www.dabeaz.com/ply/
47 | 
48 | .. |logo| image:: https://raw.githubusercontent.com/jurismarches/luqum/master/luqum-logo.png
49 | 
50 | .. |pypi-version| image:: https://img.shields.io/pypi/v/luqum.svg
51 |     :target: https://pypi.python.org/pypi/luqum
52 |     :alt: Latest PyPI version
53 | .. |travis| image:: http://img.shields.io/travis/jurismarches/luqum/master.svg?style=flat
54 |     :target: https://travis-ci.org/jurismarches/luqum
55 | .. |coveralls| image:: http://img.shields.io/coveralls/jurismarches/luqum/master.svg?style=flat
56 |     :target: https://coveralls.io/r/jurismarches/luqum
57 | .. |readthedocs| image:: https://readthedocs.org/projects/luqum/badge/?version=latest
58 |     :target: http://luqum.readthedocs.org/en/latest/?badge=latest
59 |     :alt: Documentation Status
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/luqum.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/luqum.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/luqum"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/luqum"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  epub3      to make an epub3
 31 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 32 | 	echo.  text       to make text files
 33 | 	echo.  man        to make manual pages
 34 | 	echo.  texinfo    to make Texinfo files
 35 | 	echo.  gettext    to make PO message catalogs
 36 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 37 | 	echo.  xml        to make Docutils-native XML files
 38 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 39 | 	echo.  linkcheck  to check all external links for integrity
 40 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 41 | 	echo.  coverage   to run coverage check of the documentation if enabled
 42 | 	echo.  dummy      to check syntax errors of document sources
 43 | 	goto end
 44 | )
 45 | 
 46 | if "%1" == "clean" (
 47 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 48 | 	del /q /s %BUILDDIR%\*
 49 | 	goto end
 50 | )
 51 | 
 52 | 
 53 | REM Check if sphinx-build is available and fallback to Python version if any
 54 | %SPHINXBUILD% 1>NUL 2>NUL
 55 | if errorlevel 9009 goto sphinx_python
 56 | goto sphinx_ok
 57 | 
 58 | :sphinx_python
 59 | 
 60 | set SPHINXBUILD=python -m sphinx.__init__
 61 | %SPHINXBUILD% 2> nul
 62 | if errorlevel 9009 (
 63 | 	echo.
 64 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 65 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 66 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 67 | 	echo.may add the Sphinx directory to PATH.
 68 | 	echo.
 69 | 	echo.If you don't have Sphinx installed, grab it from
 70 | 	echo.http://sphinx-doc.org/
 71 | 	exit /b 1
 72 | )
 73 | 
 74 | :sphinx_ok
 75 | 
 76 | 
 77 | if "%1" == "html" (
 78 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 79 | 	if errorlevel 1 exit /b 1
 80 | 	echo.
 81 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 82 | 	goto end
 83 | )
 84 | 
 85 | if "%1" == "dirhtml" (
 86 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 87 | 	if errorlevel 1 exit /b 1
 88 | 	echo.
 89 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 90 | 	goto end
 91 | )
 92 | 
 93 | if "%1" == "singlehtml" (
 94 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 95 | 	if errorlevel 1 exit /b 1
 96 | 	echo.
 97 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 98 | 	goto end
 99 | )
100 | 
101 | if "%1" == "pickle" (
102 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
103 | 	if errorlevel 1 exit /b 1
104 | 	echo.
105 | 	echo.Build finished; now you can process the pickle files.
106 | 	goto end
107 | )
108 | 
109 | if "%1" == "json" (
110 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
111 | 	if errorlevel 1 exit /b 1
112 | 	echo.
113 | 	echo.Build finished; now you can process the JSON files.
114 | 	goto end
115 | )
116 | 
117 | if "%1" == "htmlhelp" (
118 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
119 | 	if errorlevel 1 exit /b 1
120 | 	echo.
121 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
122 | .hhp project file in %BUILDDIR%/htmlhelp.
123 | 	goto end
124 | )
125 | 
126 | if "%1" == "qthelp" (
127 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
128 | 	if errorlevel 1 exit /b 1
129 | 	echo.
130 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
131 | .qhcp project file in %BUILDDIR%/qthelp, like this:
132 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\luqum.qhcp
133 | 	echo.To view the help file:
134 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\luqum.ghc
135 | 	goto end
136 | )
137 | 
138 | if "%1" == "devhelp" (
139 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
140 | 	if errorlevel 1 exit /b 1
141 | 	echo.
142 | 	echo.Build finished.
143 | 	goto end
144 | )
145 | 
146 | if "%1" == "epub" (
147 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
148 | 	if errorlevel 1 exit /b 1
149 | 	echo.
150 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
151 | 	goto end
152 | )
153 | 
154 | if "%1" == "epub3" (
155 | 	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
156 | 	if errorlevel 1 exit /b 1
157 | 	echo.
158 | 	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
159 | 	goto end
160 | )
161 | 
162 | if "%1" == "latex" (
163 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
164 | 	if errorlevel 1 exit /b 1
165 | 	echo.
166 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdf" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "latexpdfja" (
181 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
182 | 	cd %BUILDDIR%/latex
183 | 	make all-pdf-ja
184 | 	cd %~dp0
185 | 	echo.
186 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
187 | 	goto end
188 | )
189 | 
190 | if "%1" == "text" (
191 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
192 | 	if errorlevel 1 exit /b 1
193 | 	echo.
194 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
195 | 	goto end
196 | )
197 | 
198 | if "%1" == "man" (
199 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
200 | 	if errorlevel 1 exit /b 1
201 | 	echo.
202 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
203 | 	goto end
204 | )
205 | 
206 | if "%1" == "texinfo" (
207 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
208 | 	if errorlevel 1 exit /b 1
209 | 	echo.
210 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
211 | 	goto end
212 | )
213 | 
214 | if "%1" == "gettext" (
215 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
216 | 	if errorlevel 1 exit /b 1
217 | 	echo.
218 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
219 | 	goto end
220 | )
221 | 
222 | if "%1" == "changes" (
223 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
224 | 	if errorlevel 1 exit /b 1
225 | 	echo.
226 | 	echo.The overview file is in %BUILDDIR%/changes.
227 | 	goto end
228 | )
229 | 
230 | if "%1" == "linkcheck" (
231 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
232 | 	if errorlevel 1 exit /b 1
233 | 	echo.
234 | 	echo.Link check complete; look for any errors in the above output ^
235 | or in %BUILDDIR%/linkcheck/output.txt.
236 | 	goto end
237 | )
238 | 
239 | if "%1" == "doctest" (
240 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
241 | 	if errorlevel 1 exit /b 1
242 | 	echo.
243 | 	echo.Testing of doctests in the sources finished, look at the ^
244 | results in %BUILDDIR%/doctest/output.txt.
245 | 	goto end
246 | )
247 | 
248 | if "%1" == "coverage" (
249 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
250 | 	if errorlevel 1 exit /b 1
251 | 	echo.
252 | 	echo.Testing of coverage in the sources finished, look at the ^
253 | results in %BUILDDIR%/coverage/python.txt.
254 | 	goto end
255 | )
256 | 
257 | if "%1" == "xml" (
258 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
259 | 	if errorlevel 1 exit /b 1
260 | 	echo.
261 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
262 | 	goto end
263 | )
264 | 
265 | if "%1" == "pseudoxml" (
266 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
267 | 	if errorlevel 1 exit /b 1
268 | 	echo.
269 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
270 | 	goto end
271 | )
272 | 
273 | if "%1" == "dummy" (
274 | 	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
275 | 	if errorlevel 1 exit /b 1
276 | 	echo.
277 | 	echo.Build finished. Dummy builder generates no files.
278 | 	goto end
279 | )
280 | 
281 | :end
282 | 


--------------------------------------------------------------------------------
/docs/source/_static/luqum-logo.png:
--------------------------------------------------------------------------------
1 | ../../../luqum-logo.png


--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
 1 | What is Luqum
 2 | ###############
 3 | 
 4 | Luqum stands for LUcene QUery Manipolator.
 5 | 
 6 | It features a python library with a parser for  the `Lucene Query DSL`_ as found in
 7 | `Solr`_ `query syntax`_ or
 8 | `ElasticSearch`_ `query string`_
 9 | 
10 | From the parser it builds a tree (see :ref:`tutorial-parsing`).
11 | 
12 | This tree can eventually be manipulated
13 | and then transformed back into a query string,
14 | or used to generate other form of query.
15 | 
16 | In particular, luqum ships with
17 | a utility to transform a lucene query
18 | into a query using Elasticsearch query DSL language (in json form).
19 | (see :ref:`tutorial-elastic`)
20 | 
21 | You may use this to:
22 | 
23 | * make some sanity check on query
24 | * make your own check on query (eg. forbid certain fields)
25 | * replace some expressions in query
26 | * pretty print a query
27 | * inject queries in queries
28 | * extend lucene query language semantics
29 | 
30 | The parser is built using `PLY`_.
31 | 
32 | Luqum is dual licensed under Apache2.0 and LGPLv3.
33 | 
34 | .. warning::
35 | 
36 |    While used in production by our team for some time,
37 |    this library is still a work in progress and also lacks some features.
38 | 
39 |    Contributions are welcome.
40 | 
41 | .. _`Lucene Query DSL`: https://lucene.apache.org/core/3_6_0/queryparsersyntax.html
42 | .. _`Solr`: http://lucene.apache.org/solr/
43 | .. _`query syntax`: https://wiki.apache.org/solr/SolrQuerySyntax
44 | .. _`ElasticSearch`: https://www.elastic.co/products/elasticsearch
45 | .. _`query string`: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
46 | .. _`PLY`: http://www.dabeaz.com/ply/ply.html
47 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | #####
 3 | 
 4 | Parsing and constructing queries
 5 | ==================================
 6 | 
 7 | This is the core of the library. A parser and the syntax tree definition.
 8 | 
 9 | luqum.parser
10 | ---------------
11 | 
12 | .. automodule:: luqum.parser
13 |    :members: parser, parse
14 | 
15 | luqum.threading
16 | ---------------
17 | 
18 | .. automodule:: luqum.thread
19 |    :members: parse
20 | 
21 | luqum.tree
22 | ---------------
23 | 
24 | .. automodule:: luqum.tree
25 |    :members:
26 |    :member-order: bysource
27 | 
28 | .. _elasticsearch-api:
29 | 
30 | Transforming to Elastic Search queries
31 | ======================================
32 | 
33 | luqum.schema
34 | ------------
35 | 
36 | .. autoclass:: luqum.elasticsearch.schema.SchemaAnalyzer
37 |    :members: 
38 |    :member-order: bysource
39 | 
40 | 
41 | luqum.elasticsearch
42 | --------------------
43 | 
44 | .. autoclass:: luqum.elasticsearch.visitor.ElasticsearchQueryBuilder
45 |    :members: __init__, __call__
46 |    :member-order: bysource
47 | 
48 | 
49 | Naming and explaining matches
50 | ==============================
51 | 
52 | 
53 | luqum.naming
54 | ------------
55 | 
56 | .. automodule:: luqum.naming
57 |    :members:
58 |    :member-order: bysource
59 | 
60 | 
61 | Utilities
62 | ==========
63 | 
64 | 
65 | luqum.visitor: Manipulating trees
66 | ----------------------------------
67 | 
68 | .. automodule:: luqum.visitor
69 |    :members:
70 |    :member-order: bysource
71 | 
72 | 
73 | luqum.auto_head_tail: Automatic addition of spaces
74 | --------------------------------------------------
75 | 
76 | .. automodule:: luqum.auto_head_tail
77 |    :members:
78 | 
79 | luqum.pretty: Pretty printing
80 | ------------------------------
81 | 
82 | .. automodule:: luqum.pretty
83 |    :members:
84 | 
85 | luqum.check: Checking for validity
86 | -----------------------------------
87 | 
88 | .. automodule:: luqum.check
89 |    :members:
90 | 
91 | luqum.utils: Misc
92 | -----------------
93 | 
94 | .. automodule:: luqum.utils
95 |    :members:
96 |    :member-order: bysource
97 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # luqum documentation build configuration file, created by
  5 | # sphinx-quickstart on Wed Apr 13 10:25:52 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | import sys
 17 | import os
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | #sys.path.insert(0, os.path.abspath('.'))
 23 | sys.path.insert(0, os.path.abspath('../..'))
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = [
 34 |     'sphinx.ext.intersphinx',
 35 |     'sphinx.ext.autodoc',
 36 |     'sphinx.ext.graphviz',
 37 |     'alabaster',
 38 | ]
 39 | 
 40 | graphviz_dot_args = ["-Nshape=box"]
 41 | graphviz_output_format = "svg"
 42 | 
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['_templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | # source_suffix = ['.rst', '.md']
 49 | source_suffix = '.rst'
 50 | 
 51 | # The encoding of source files.
 52 | #source_encoding = 'utf-8-sig'
 53 | 
 54 | # The master toctree document.
 55 | master_doc = 'index'
 56 | 
 57 | # General information about the project.
 58 | project = 'luqum'
 59 | copyright = '2016, jurismarches'
 60 | author = 'jurismarches'
 61 | 
 62 | # The version info for the project you're documenting, acts as replacement for
 63 | # |version| and |release|, also used in various other places throughout the
 64 | # built documents.
 65 | #
 66 | # The short X.Y version.
 67 | version = '0.7'
 68 | # The full version, including alpha/beta/rc tags.
 69 | release = '0.7.1'
 70 | 
 71 | # The language for content autogenerated by Sphinx. Refer to documentation
 72 | # for a list of supported languages.
 73 | #
 74 | # This is also used if you do content translation via gettext catalogs.
 75 | # Usually you set "language" from the command line for these cases.
 76 | language = "en"
 77 | 
 78 | # There are two options for replacing |today|: either, you set today to some
 79 | # non-false value, then it is used:
 80 | #today = ''
 81 | # Else, today_fmt is used as the format for a strftime call.
 82 | #today_fmt = '%B %d, %Y'
 83 | 
 84 | # List of patterns, relative to source directory, that match files and
 85 | # directories to ignore when looking for source files.
 86 | # This patterns also effect to html_static_path and html_extra_path
 87 | exclude_patterns = []
 88 | 
 89 | # The reST default role (used for this markup: `text`) to use for all
 90 | # documents.
 91 | #default_role = None
 92 | 
 93 | # If true, '()' will be appended to :func: etc. cross-reference text.
 94 | #add_function_parentheses = True
 95 | 
 96 | # If true, the current module name will be prepended to all description
 97 | # unit titles (such as .. function::).
 98 | #add_module_names = True
 99 | 
100 | # If true, sectionauthor and moduleauthor directives will be shown in the
101 | # output. They are ignored by default.
102 | #show_authors = False
103 | 
104 | # The name of the Pygments (syntax highlighting) style to use.
105 | pygments_style = 'sphinx'
106 | 
107 | # A list of ignored prefixes for module index sorting.
108 | #modindex_common_prefix = []
109 | 
110 | # If true, keep warnings as "system message" paragraphs in the built documents.
111 | #keep_warnings = False
112 | 
113 | # If true, `todo` and `todoList` produce output, else they produce nothing.
114 | todo_include_todos = False
115 | 
116 | 
117 | # -- Options for HTML output ----------------------------------------------
118 | 
119 | # The theme to use for HTML and HTML Help pages.  See the documentation for
120 | # a list of builtin themes.
121 | html_theme = 'alabaster'
122 | 
123 | # Theme options are theme-specific and customize the look and feel of a theme
124 | # further.  For a list of options available for each theme, see the
125 | # documentation.
126 | #html_theme_options = {}
127 | html_theme_options = {
128 |     'logo': 'luqum-logo.png',
129 |     'description': 'LUcene QUery Manipulator in python',
130 |     'github_user': 'jurismarches',
131 |     'github_repo': 'luqum',
132 |     'github_banner': True}
133 | 
134 | # Add any paths that contain custom themes here, relative to this directory.
135 | #html_theme_path = []
136 | 
137 | # The name for this set of Sphinx documents.
138 | # "<project> v<release> documentation" by default.
139 | #html_title = 'luqum v1.0'
140 | 
141 | # A shorter title for the navigation bar.  Default is the same as html_title.
142 | #html_short_title = None
143 | 
144 | # The name of an image file (relative to this directory) to place at the top
145 | # of the sidebar.
146 | #html_logo = None
147 | 
148 | # The name of an image file (relative to this directory) to use as a favicon of
149 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
150 | # pixels large.
151 | #html_favicon = None
152 | 
153 | # Add any paths that contain custom static files (such as style sheets) here,
154 | # relative to this directory. They are copied after the builtin static files,
155 | # so a file named "default.css" will overwrite the builtin "default.css".
156 | html_static_path = ['_static']
157 | 
158 | # Add any extra paths that contain custom files (such as robots.txt or
159 | # .htaccess) here, relative to this directory. These files are copied
160 | # directly to the root of the documentation.
161 | #html_extra_path = []
162 | 
163 | # If not None, a 'Last updated on:' timestamp is inserted at every page
164 | # bottom, using the given strftime format.
165 | # The empty string is equivalent to '%b %d, %Y'.
166 | #html_last_updated_fmt = None
167 | 
168 | # If true, SmartyPants will be used to convert quotes and dashes to
169 | # typographically correct entities.
170 | #html_use_smartypants = True
171 | 
172 | # Custom sidebar templates, maps document names to template names.
173 | #html_sidebars = {}
174 | html_sidebars = {'**': [
175 |     'about.html',
176 |     'navigation.html',
177 |     'relations.html',
178 |     'searchbox.html',
179 |     'donate.html']}
180 | 
181 | 
182 | # Additional templates that should be rendered to pages, maps page names to
183 | # template names.
184 | #html_additional_pages = {}
185 | 
186 | # If false, no module index is generated.
187 | #html_domain_indices = True
188 | 
189 | # If false, no index is generated.
190 | #html_use_index = True
191 | 
192 | # If true, the index is split into individual pages for each letter.
193 | #html_split_index = False
194 | 
195 | # If true, links to the reST sources are added to the pages.
196 | #html_show_sourcelink = True
197 | 
198 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
199 | #html_show_sphinx = True
200 | 
201 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
202 | #html_show_copyright = True
203 | 
204 | # If true, an OpenSearch description file will be output, and all pages will
205 | # contain a <link> tag referring to it.  The value of this option must be the
206 | # base URL from which the finished HTML is served.
207 | #html_use_opensearch = ''
208 | 
209 | # This is the file name suffix for HTML files (e.g. ".xhtml").
210 | #html_file_suffix = None
211 | 
212 | # Language to be used for generating the HTML full-text search index.
213 | # Sphinx supports the following languages:
214 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
215 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
216 | #html_search_language = 'en'
217 | 
218 | # A dictionary with options for the search language support, empty by default.
219 | # 'ja' uses this config value.
220 | # 'zh' user can custom change `jieba` dictionary path.
221 | #html_search_options = {'type': 'default'}
222 | 
223 | # The name of a javascript file (relative to the configuration directory) that
224 | # implements a search results scorer. If empty, the default will be used.
225 | #html_search_scorer = 'scorer.js'
226 | 
227 | # Output file base name for HTML help builder.
228 | htmlhelp_basename = 'luqumdoc'
229 | 
230 | # -- Options for LaTeX output ---------------------------------------------
231 | 
232 | latex_elements = {
233 | # The paper size ('letterpaper' or 'a4paper').
234 | #'papersize': 'letterpaper',
235 | 
236 | # The font size ('10pt', '11pt' or '12pt').
237 | #'pointsize': '10pt',
238 | 
239 | # Additional stuff for the LaTeX preamble.
240 | #'preamble': '',
241 | 
242 | # Latex figure (float) alignment
243 | #'figure_align': 'htbp',
244 | }
245 | 
246 | # Grouping the document tree into LaTeX files. List of tuples
247 | # (source start file, target name, title,
248 | #  author, documentclass [howto, manual, or own class]).
249 | latex_documents = [
250 |     (master_doc, 'luqum.tex', 'luqum Documentation',
251 |      'jurismarches', 'manual'),
252 | ]
253 | 
254 | # The name of an image file (relative to this directory) to place at the top of
255 | # the title page.
256 | #latex_logo = None
257 | 
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | #latex_use_parts = False
261 | 
262 | # If true, show page references after internal links.
263 | #latex_show_pagerefs = False
264 | 
265 | # If true, show URL addresses after external links.
266 | #latex_show_urls = False
267 | 
268 | # Documents to append as an appendix to all manuals.
269 | #latex_appendices = []
270 | 
271 | # If false, no module index is generated.
272 | #latex_domain_indices = True
273 | 
274 | 
275 | # -- Options for manual page output ---------------------------------------
276 | 
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [
280 |     (master_doc, 'luqum', 'luqum Documentation',
281 |      [author], 1)
282 | ]
283 | 
284 | # If true, show URL addresses after external links.
285 | #man_show_urls = False
286 | 
287 | 
288 | # -- Options for Texinfo output -------------------------------------------
289 | 
290 | # Grouping the document tree into Texinfo files. List of tuples
291 | # (source start file, target name, title, author,
292 | #  dir menu entry, description, category)
293 | texinfo_documents = [
294 |     (master_doc, 'luqum', 'luqum Documentation',
295 |      author, 'luqum', 'One line description of project.',
296 |      'Miscellaneous'),
297 | ]
298 | 
299 | # Documents to append as an appendix to all manuals.
300 | #texinfo_appendices = []
301 | 
302 | # If false, no module index is generated.
303 | #texinfo_domain_indices = True
304 | 
305 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
306 | #texinfo_show_urls = 'footnote'
307 | 
308 | # If true, do not generate a @detailmenu in the "Top" node's menu.
309 | #texinfo_no_detailmenu = False
310 | 
311 | 
312 | # Example configuration for intersphinx: refer to the Python standard library.
313 | intersphinx_mapping = {'https://docs.python.org/': None}
314 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. luqum documentation master file, created by
 2 |    sphinx-quickstart on Wed Apr 13 10:25:52 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to luqum's documentation!
 7 | =================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    about
15 |    install
16 |    quick_start
17 |    api
18 |    
19 | 
20 | 
21 | 
22 | Indices and tables
23 | ==================
24 | 
25 | * :ref:`genindex`
26 | * :ref:`modindex`
27 | * :ref:`search`
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Installation
 3 | ============
 4 | 
 5 | At the command line:
 6 | 
 7 | .. code-block:: bash
 8 | 
 9 |     $ [sudo] pip install luqum
10 | 
11 | 


--------------------------------------------------------------------------------
/luqum-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/luqum-logo.png


--------------------------------------------------------------------------------
/luqum/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __version__ = '1.0.0'
4 | __version_info__ = tuple(__version__.split('.'))
5 | 


--------------------------------------------------------------------------------
/luqum/auto_head_tail.py:
--------------------------------------------------------------------------------
 1 | """It can be teadious to add spaces in a tree you generate programatically.
 2 | 
 3 | This module provide a utility to transform a tree so that it contains necessary head/tail
 4 | for expression to be printable.
 5 | """
 6 | 
 7 | from . import visitor
 8 | 
 9 | 
10 | class AutoHeadTail(visitor.TreeTransformer):
11 |     """This class implements a transformer so that hand built tree,
12 |     can have reasonable values for `head` and `tail` on their items,
13 |     in order for the expression to be printable.
14 |     """
15 | 
16 |     SPACER = " "
17 | 
18 |     def add_head(self, node):
19 |         if not node.head:
20 |             node.head = self.SPACER
21 | 
22 |     def add_tail(self, node):
23 |         if not node.tail:
24 |             node.tail = self.SPACER
25 | 
26 |     def visit_base_operation(self, node, context):
27 |         new_node = node.clone_item()
28 |         children = list(self.clone_children(node, new_node, context))
29 |         # add tail to first node
30 |         self.add_tail(children[0])
31 |         # add head and tail to inner nodes
32 |         for child in children[1:-1]:
33 |             self.add_head(child)
34 |             self.add_tail(child)
35 |         # add head to last
36 |         self.add_head(children[-1])
37 |         new_node.children = children
38 |         yield new_node
39 | 
40 |     def visit_unknown_operation(self, node, context):
41 |         new_node = node.clone_item()
42 |         children = list(self.clone_children(node, new_node, context))
43 |         # add tail to each node, but last
44 |         for child in children[:-1]:
45 |             self.add_tail(child)
46 |         new_node.children = children
47 |         yield new_node
48 | 
49 |     def visit_not(self, node, context):
50 |         new_node = node.clone_item()
51 |         children = list(self.clone_children(node, new_node, context))
52 |         # add head to children, to have space between NOT and sub expression
53 |         self.add_head(children[0])
54 |         new_node.children = children
55 |         yield new_node
56 | 
57 |     def visit_range(self, node, context):
58 |         new_node = node.clone_item()
59 |         children = list(self.clone_children(node, new_node, context))
60 |         # add tail to lower_bound, and head to upper bound
61 |         self.add_tail(children[0])
62 |         self.add_head(children[-1])
63 |         new_node.children = children
64 |         yield new_node
65 | 
66 |     def __call__(self, tree):
67 |         new_tree = self.visit(tree)
68 |         return new_tree
69 | 
70 | 
71 | auto_head_tail = AutoHeadTail()
72 | """method to auto add head and tail to items of a lucene tree so that it is printable
73 | """
74 | 


--------------------------------------------------------------------------------
/luqum/check.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import functools
  3 | import math
  4 | import re
  5 | 
  6 | from . import tree
  7 | from . import visitor
  8 | from .exceptions import NestedSearchFieldException, ObjectSearchFieldException
  9 | from .utils import flatten_nested_fields_specs, normalize_object_fields_specs
 10 | 
 11 | 
 12 | def camel_to_lower(name):
 13 |     return "".join(
 14 |         "_" + w.lower() if w.isupper() else w.lower()
 15 |         for w in name).lstrip("_")
 16 | 
 17 | 
 18 | sign = functools.partial(math.copysign, 1)
 19 | 
 20 | 
 21 | def _check_children(f):
 22 |     """A decorator to call check on item children
 23 |     """
 24 |     @functools.wraps(f)
 25 |     def wrapper(self, item, parents):
 26 |         yield from f(self, item, parents)
 27 |         for child in item.children:
 28 |             yield from self.check(child, parents + [item])
 29 |     return wrapper
 30 | 
 31 | 
 32 | class LuceneCheck:
 33 |     """Check if a query is consistent
 34 | 
 35 |     This is intended to use with query constructed as tree,
 36 |     as well as those parsed by the parser, which is more tolerant.
 37 | 
 38 |     :param int zeal: if zeal > 0 do extra check of some pitfalls, depending on zeal level
 39 |     """
 40 |     field_name_re = re.compile(r"^\w+$")
 41 |     space_re = re.compile(r"\s")
 42 |     invalid_term_chars_re = re.compile(r"[+/-]")
 43 | 
 44 |     SIMPLE_EXPR_FIELDS = (
 45 |         tree.Boost, tree.Proximity, tree.Fuzzy, tree.Word, tree.Phrase)
 46 | 
 47 |     FIELD_EXPR_FIELDS = tuple(list(SIMPLE_EXPR_FIELDS) + [tree.FieldGroup])
 48 | 
 49 |     def __init__(self, zeal=0):
 50 |         self.zeal = zeal
 51 | 
 52 |     def _check_field_name(self, fname):
 53 |         return self.field_name_re.match(fname) is not None
 54 | 
 55 |     @_check_children
 56 |     def check_search_field(self, item, parents):
 57 |         if not self._check_field_name(item.name):
 58 |             yield "%s is not a valid field name" % item.name
 59 |         if not isinstance(item.expr, self.FIELD_EXPR_FIELDS):
 60 |             yield "field expression is not valid : %s" % item
 61 | 
 62 |     @_check_children
 63 |     def check_group(self, item, parents):
 64 |         if parents and isinstance(parents[-1], tree.SearchField):
 65 |             yield "Group misuse, after SearchField you should use Group : %s" % parents[-1]
 66 | 
 67 |     @_check_children
 68 |     def check_field_group(self, item, parents):
 69 |         if not parents or not isinstance(parents[-1], tree.SearchField):
 70 |             yield ("FieldGroup misuse, it must be used after SearchField : %s" %
 71 |                    (parents[-1] if parents else item))
 72 | 
 73 |     def check_range(self, item, parents):
 74 |         # TODO check lower bound <= higher bound taking into account wildcard and numbers
 75 |         return iter([])
 76 | 
 77 |     def check_word(self, item, parents):
 78 |         if self.space_re.search(item.value):
 79 |             yield "A single term value can't hold a space %s" % item
 80 |         if self.zeal and self.invalid_term_chars_re.search(item.value):
 81 |             yield "Invalid characters in term value: %s" % item.value
 82 | 
 83 |     def check_fuzzy(self, item, parents):
 84 |         if sign(item.degree) < 0:
 85 |             yield "invalid degree %d, it must be positive" % item.degree
 86 |         if not isinstance(item.term, tree.Word):
 87 |             yield "Fuzzy should be on a single term in %s" % str(item)
 88 | 
 89 |     def check_proximity(self, item, parents):
 90 |         if not isinstance(item.term, tree.Phrase):
 91 |             yield "Proximity can be only on a phrase in %s" % str(item)
 92 | 
 93 |     @_check_children
 94 |     def check_boost(self, item, parents):
 95 |         return iter([])
 96 | 
 97 |     @_check_children
 98 |     def check_base_operation(self, item, parents):
 99 |         return iter([])
100 | 
101 |     @_check_children
102 |     def check_plus(self, item, parents):
103 |         return iter([])
104 | 
105 |     def _check_not_operator(self, item, parents):
106 |         """Common checker for NOT and - operators"""
107 |         if self.zeal:
108 |             if isinstance(parents[-1], tree.OrOperation):
109 |                 yield ("Prohibit or Not really means 'AND NOT' " +
110 |                        "wich is inconsistent with OR operation in %s" % parents[-1])
111 | 
112 |     @_check_children
113 |     def check_not(self, item, parents):
114 |         return self._check_not_operator(item, parents)
115 | 
116 |     @_check_children
117 |     def check_prohibit(self, item, parents):
118 |         return self._check_not_operator(item, parents)
119 | 
120 |     def check(self, item, parents=[]):
121 |         # dispatching check to anothe method
122 |         for cls in item.__class__.mro():
123 |             meth = getattr(self, "check_" + camel_to_lower(cls.__name__), None)
124 |             if meth is not None:
125 |                 yield from meth(item, parents)
126 |                 break
127 |         else:
128 |             yield "Unknown item type %s : %s" % (item.__class__.__name__, str(item))
129 | 
130 |     def __call__(self, tree):
131 |         """return True only if there are no error
132 |         """
133 |         for error in self.check(tree):
134 |             return False
135 |         return True
136 | 
137 |     def errors(self, tree):
138 |         """List all errors"""
139 |         return list(self.check(tree))
140 | 
141 | 
142 | class CheckNestedFields(visitor.TreeVisitor):
143 |     """
144 |     Visit the lucene tree to make some checks
145 | 
146 |     In particular to check nested fields.
147 | 
148 |     :param nested_fields: a dict where keys are name of nested fields,
149 |         values are dict of sub-nested fields or an empty dict for leaf
150 |     :param object_fields:
151 |         this is either None, in which case unknown object fields will be accepted,
152 |         or a dict of sub-nested fields (like nested_fields)
153 |     """
154 | 
155 |     def __init__(self, nested_fields, object_fields=None, sub_fields=None):
156 |         assert isinstance(nested_fields, dict)
157 |         self.object_fields = normalize_object_fields_specs(object_fields)
158 |         self.object_prefixes = set(k.rsplit(".", 1)[0] for k in self.object_fields or [])
159 |         self.nested_fields = flatten_nested_fields_specs(nested_fields)
160 |         self.nested_prefixes = set(k.rsplit(".", 1)[0] for k in self.nested_fields)
161 |         self.sub_fields = normalize_object_fields_specs(sub_fields)
162 |         super().__init__(track_parents=True)
163 | 
164 |     def visit_search_field(self, node, context):
165 |         """
166 |         On search field node, check nested fields logic
167 |         """
168 |         child_context = dict(context)  # copy
169 |         child_context["prefix"] = context["prefix"] + node.name.split(".")
170 |         yield from self.generic_visit(node, child_context)
171 | 
172 |     def _check_final_operation(self, node, context):
173 |         prefix = context["prefix"]
174 |         if prefix:
175 |             fullname = ".".join(prefix)
176 |             if fullname in self.nested_prefixes:
177 |                 raise NestedSearchFieldException(
178 |                     ('''"{expr}" can't be directly attributed to "{field}"''' +
179 |                      ''' as it is a nested field''')
180 |                     .format(expr=str(node), field=fullname))
181 |             elif fullname in self.object_prefixes:
182 |                 raise NestedSearchFieldException(
183 |                     ('''"{expr}" can't be directly attributed to "{field}"''' +
184 |                      ''' as it is an object field''')
185 |                     .format(expr=str(node), field=fullname))
186 |             # note : the above check do not stand for subfield,
187 |             # as their field can have an expression
188 |             elif len(prefix) > 1:
189 |                 unknown_field = (
190 |                     self.sub_fields is not None and
191 |                     self.object_fields is not None and
192 |                     fullname not in self.sub_fields and
193 |                     fullname not in self.object_fields and
194 |                     fullname not in self.nested_fields)
195 |                 if unknown_field:
196 |                     raise ObjectSearchFieldException(
197 |                         '''"{expr}" attributed to unknown nested or object field "{field}"'''
198 |                         .format(expr=str(node), field=fullname))
199 | 
200 |     def visit_phrase(self, node, context):
201 |         """
202 |         On phrase field, verify term is in a final search field
203 |         """
204 |         yield self._check_final_operation(node, context)
205 | 
206 |     def visit_term(self, node, context):
207 |         """
208 |         On term field, verify term is in a final search field
209 |         """
210 |         yield self._check_final_operation(node, context)
211 | 
212 |     def __call__(self, tree):
213 |         return list(self.visit_iter(tree, context={"prefix": []}))
214 | 


--------------------------------------------------------------------------------
/luqum/deprecated_utils.py:
--------------------------------------------------------------------------------
  1 | """Deprecated visitor helper classes.
  2 | 
  3 | see :py:mod:`luqum.visitor` for newer implementations
  4 | """
  5 | 
  6 | import warnings
  7 | 
  8 | from .visitor import camel_to_lower
  9 | 
 10 | 
 11 | class LuceneTreeVisitor:
 12 |     """
 13 |     Tree Visitor base class, inspired by python's :class:`ast.NodeVisitor`.
 14 | 
 15 |     This class is meant to be subclassed, with the subclass implementing
 16 |     visitor methods for each Node type it is interested in.
 17 | 
 18 |     By default, those visitor method should be named ``'visit_'`` + class
 19 |     name of the node, converted to lower_case (ie: visit_search_node for a
 20 |     SearchNode class).
 21 | 
 22 |     You can tweak this behaviour by overriding the `visitor_method_prefix` &
 23 |     `generic_visitor_method_name` class attributes.
 24 | 
 25 |     If the goal is to modify the initial tree,
 26 |     use :py:class:`LuceneTreeTranformer` instead.
 27 |     """
 28 |     visitor_method_prefix = 'visit_'
 29 |     generic_visitor_method_name = 'generic_visit'
 30 | 
 31 |     _get_method_cache = None
 32 | 
 33 |     def _get_method(self, node):
 34 |         if self._get_method_cache is None:
 35 |             self._get_method_cache = {}
 36 |         try:
 37 |             meth = self._get_method_cache[type(node)]
 38 |         except KeyError:
 39 |             for cls in node.__class__.mro():
 40 |                 try:
 41 |                     method_name = "{}{}".format(
 42 |                         self.visitor_method_prefix,
 43 |                         camel_to_lower(cls.__name__)
 44 |                     )
 45 |                     meth = getattr(self, method_name)
 46 |                     break
 47 |                 except AttributeError:
 48 |                     continue
 49 |             else:
 50 |                 meth = getattr(self, self.generic_visitor_method_name)
 51 |             self._get_method_cache[type(node)] = meth
 52 |         return meth
 53 | 
 54 |     def visit(self, node, parents=None):
 55 |         """ Basic, recursive traversal of the tree. """
 56 |         warnings.warn(
 57 |             "LuceneTreeVisitor is deprecated in favor of visitor.TreeVisitor",
 58 |             DeprecationWarning,
 59 |         )
 60 |         parents = parents or []
 61 |         method = self._get_method(node)
 62 |         yield from method(node, parents)
 63 |         for child in node.children:
 64 |             yield from self.visit(child, parents + [node])
 65 | 
 66 |     def generic_visit(self, node, parents=None):
 67 |         """
 68 |         Default visitor function, called if nothing matches the current node.
 69 |         """
 70 |         return iter([])  # No-op
 71 | 
 72 | 
 73 | class LuceneTreeTransformer(LuceneTreeVisitor):
 74 |     """
 75 |     A :class:`LuceneTreeVisitor` subclass that walks the abstract syntax tree
 76 |     and allows modifications of traversed nodes.
 77 | 
 78 |     The `LuceneTreeTransormer` will walk the AST and use the return value of the
 79 |     visitor methods to replace or remove the old node. If the return value of
 80 |     the visitor method is ``None``, the node will be removed from its location,
 81 |     otherwise it is replaced with the return value. The return value may be the
 82 |     original node, in which case no replacement takes place.
 83 |     """
 84 | 
 85 |     def replace_node(self, old_node, new_node, parent):
 86 |         for k, v in parent.__dict__.items():  # pragma: no branch
 87 |             if v == old_node:
 88 |                 parent.__dict__[k] = new_node
 89 |                 break
 90 |             elif isinstance(v, list):
 91 |                 try:
 92 |                     i = v.index(old_node)
 93 |                     if new_node is None:
 94 |                         del v[i]
 95 |                     else:
 96 |                         v[i] = new_node
 97 |                     break
 98 |                 except ValueError:
 99 |                     pass  # this was not the attribute containing old_node
100 |             elif isinstance(v, tuple):
101 |                 try:
102 |                     i = v.index(old_node)
103 |                     v = list(v)
104 |                     if new_node is None:
105 |                         del v[i]
106 |                     else:
107 |                         v[i] = new_node
108 |                     parent.__dict__[k] = tuple(v)
109 |                     break
110 |                 except ValueError:
111 |                     pass  # this was not the attribute containing old_node
112 | 
113 |     def generic_visit(self, node, parent=None):
114 |         return node
115 | 
116 |     def visit(self, node, parents=None):
117 |         """
118 |         Recursively traverses the tree and replace nodes with the appropriate
119 |         visitor method's return values.
120 |         """
121 |         warnings.warn(
122 |             "LuceneTreeTransformer is deprecated in favor of visitor.TreeTransformer",
123 |             DeprecationWarning,
124 |         )
125 |         parents = parents or []
126 |         method = self._get_method(node)
127 |         new_node = method(node, parents)
128 |         if parents:
129 |             self.replace_node(node, new_node, parents[-1])
130 |         node = new_node
131 |         if node is not None:
132 |             for child in node.children:
133 |                 self.visit(child, parents + [node])
134 |         return node
135 | 
136 | 
137 | class LuceneTreeVisitorV2(LuceneTreeVisitor):
138 |     """
139 |     V2 of the LuceneTreeVisitor allowing to evaluate the AST
140 | 
141 |     It differs from py:cls:`LuceneTreeVisitor`
142 |     because it's up to the visit method to recursively call children (or not)
143 | 
144 |     This class is meant to be subclassed, with the subclass implementing
145 |     visitor methods for each Node type it is interested in.
146 | 
147 |     By default, those visitor method should be named ``'visit_'`` + class
148 |     name of the node, converted to lower_case (ie: visit_search_node for a
149 |     SearchNode class).
150 | 
151 |     You can tweak this behaviour by overriding the `visitor_method_prefix` &
152 |     `generic_visitor_method_name` class attributes.
153 | 
154 |     If the goal is to modify the initial tree,
155 |     use :py:class:`LuceneTreeTranformer` instead.
156 |     """
157 | 
158 |     def visit(self, node, parents=None, context=None):
159 |         """ Basic, recursive traversal of the tree.
160 | 
161 |         :param list parents: the list of parents
162 |         :parma dict context: a dict of contextual variable for free use
163 |           to track states while traversing the tree
164 |         """
165 |         warnings.warn(
166 |             "LuceneTreeVisitorV2 is deprecated in favor of visitor.TreeVisitor",
167 |             DeprecationWarning,
168 |         )
169 |         if parents is None:
170 |             parents = []
171 | 
172 |         method = self._get_method(node)
173 |         return method(node, parents, context)
174 | 
175 |     def generic_visit(self, node, parents=None, context=None):
176 |         """
177 |         Default visitor function, called if nothing matches the current node.
178 |         """
179 |         raise AttributeError(
180 |             "No visitor found for this type of node: {}".format(
181 |                 node.__class__
182 |             )
183 |         )
184 | 


--------------------------------------------------------------------------------
/luqum/elasticsearch/__init__.py:
--------------------------------------------------------------------------------
1 | from .visitor import ElasticsearchQueryBuilder  # noqa:
2 | from .schema import SchemaAnalyzer  # noqa:
3 | 


--------------------------------------------------------------------------------
/luqum/elasticsearch/nested.py:
--------------------------------------------------------------------------------
 1 | """If you have a query with a nested query containing operations,
 2 | when using named queries, Elasticsearch won't report inner matching.
 3 | 
 4 | This is a problem if you extensively use it.
 5 | """
 6 | 
 7 | 
 8 | def get_first_name(query):
 9 |     if isinstance(query, dict):
10 |         if "_name" in query:
11 |             return query["_name"]
12 |         elif "bool" in query:
13 |             # do not go down bool
14 |             return None
15 |         else:
16 |             children = query.values()
17 |     elif isinstance(query, list):
18 |         children = query
19 |     else:
20 |         return None
21 |     iter_candidates = (get_first_name(child) for child in children)
22 |     candidates = [candidate for candidate in iter_candidates if candidate is not None]
23 |     return candidates[0] if candidates else None
24 | 
25 | 
26 | def extract_nested_queries(query, query_nester=None):
27 |     """given a query,
28 |     extract all queries that are under a nested query and boolean operations,
29 |     returning an atomic nested version of them.
30 |     Those nested queries, also take care of changing the name to the nearest inner name,
31 | 
32 |     This is useful for Elasticsearch won't go down explaining why a nested query is matching.
33 | 
34 |     :param dict query: elasticsearch query to analyze
35 |     :param callable query_nester: this is the function called to nest sub queries, leave it default
36 |     :return list: queries that you should run to get all matching
37 | 
38 |     .. note:: because we re-nest part of bool queries, results might not be accurate
39 |        for::
40 |           {"bool": "must" : [
41 |               {"nested": {"path": "a", "match": {"x": "y"}}},
42 |               {"nested": {"path": "a", "match": {"x": "z"}}}
43 |           ]}
44 |        is not the same as::
45 |           {"nested": {"path": "a", "bool": "must": [{"match": {"x": "y"}}, {"match": {"x": "z"}}]}}
46 | 
47 |        if x is multivalued.
48 |        The first would match `{"a": [{"x": "y"}, {"x": "z"}]}`
49 |        While the second would only match if `x` contains `"y z"` or `"z y"`
50 |     """
51 |     queries = []  # this contains our result
52 |     in_nested = query_nester is not None
53 |     sub_query_nester = query_nester
54 |     if isinstance(query, dict):
55 |         if "nested" in query:
56 |             params = {k: v for k, v in query["nested"].items() if k not in ("query", "name")}
57 | 
58 |             def sub_query_nester_func(req, name):
59 |                 nested = {"nested": {"query": req, **params}}
60 |                 if query_nester is not None:
61 |                     nested = query_nester(nested, name)
62 |                 if name is not None:
63 |                     nested["nested"]["_name"] = name
64 |                 return nested
65 | 
66 |             sub_query_nester = sub_query_nester_func
67 | 
68 |         bool_param = {"must", "should", "must_not"} & set(query.keys())
69 |         if bool_param and in_nested:
70 |             # we are in a list of operations in a bool inside a nested,
71 |             # make a query with nested on sub arguments
72 |             op, = bool_param  # must or should or must_not
73 |             # normalize to a list
74 |             sub_queries = query[op] if isinstance(query[op], list) else [query[op]]
75 |             # add nesting
76 |             nested_sub_queries = [
77 |                 query_nester(sub_query, get_first_name(sub_query)) for sub_query in sub_queries
78 |             ]
79 |             # those are queries we want to return
80 |             queries.extend(nested_sub_queries)
81 |             # continue processing in each sub query
82 |             # (before nesting, nesting is contained in query_nester)
83 |             children = sub_queries
84 |         else:
85 |             children = query.values()
86 |     elif isinstance(query, list):
87 |         children = query
88 |     else:
89 |         # leaf: final recursivity
90 |         children = []
91 | 
92 |     # recurse
93 |     for child_query in children:
94 |         queries.extend(
95 |             extract_nested_queries(child_query, query_nester=sub_query_nester)
96 |         )
97 |     return queries
98 | 


--------------------------------------------------------------------------------
/luqum/elasticsearch/schema.py:
--------------------------------------------------------------------------------
  1 | """Analyzing elasticSearch schema to provide helpers for query transformation
  2 | """
  3 | 
  4 | 
  5 | class SchemaAnalyzer:
  6 |     """An helper that analyze ElasticSearch schema, to give you suitable options
  7 |     to use when transforming queries.
  8 | 
  9 |     :param dict schema: the index settings as a dict.
 10 |     """
 11 | 
 12 |     def __init__(self, schema):
 13 |         self.settings = schema.get("settings", {})
 14 |         mappings = schema.get("mappings", {})
 15 |         if mappings.get("properties"):
 16 |             # ES >= 6 : one document type per index
 17 |             self.mappings = {"_doc": mappings}
 18 |         else:
 19 |             # ES < 6 : multiple document types per index allowed
 20 |             self.mappings = mappings
 21 | 
 22 |     def _dot_name(self, fname, parents):
 23 |         return ".".join([p[0] for p in parents] + [fname])
 24 | 
 25 |     def default_field(self):
 26 |         try:
 27 |             return self.settings["query"]["default_field"]
 28 |         except KeyError:
 29 |             return "*"
 30 | 
 31 |     def _walk_properties(self, properties, parents=None, subfields=False):
 32 |         if parents is None:
 33 |             parents = []
 34 |         for fname, fdef in properties.items():
 35 |             yield fname, fdef, parents
 36 |             if subfields and "fields" in fdef:
 37 |                 subfield_parents = parents + [(fname, fdef)]
 38 |                 subdef = dict(fdef)  # sub field definition overload their parents one
 39 |                 subfield_defs = subdef.pop("fields")
 40 |                 for fname, fdef in subfield_defs.items():
 41 |                     fdef = dict(subdef, **fdef)
 42 |                     yield fname, fdef, subfield_parents
 43 |             inner_properties = fdef.get("properties", {})
 44 |             if inner_properties:
 45 |                 new_parents = parents + [(fname, fdef)]
 46 |                 yield from self._walk_properties(inner_properties, new_parents, subfields)
 47 | 
 48 |     def iter_fields(self, subfields=False):
 49 |         for mapping in self.mappings.values():
 50 |             yield from self._walk_properties(mapping.get("properties", {}), subfields=subfields)
 51 | 
 52 |     def not_analyzed_fields(self):
 53 |         for fname, fdef, parents in self.iter_fields(subfields=True):
 54 |             not_analyzed = (
 55 |                 (fdef.get("type") == "string" and fdef.get("index", "") == "not_analyzed") or
 56 |                 fdef.get("type") not in ("text", "string", "nested", "object")
 57 |             )
 58 |             if not_analyzed:
 59 |                 yield self._dot_name(fname, parents)
 60 | 
 61 |     def nested_fields(self):
 62 |         result = {}
 63 |         for fname, fdef, parents in self.iter_fields():
 64 |             pdef = parents[-1][1] if parents else {}
 65 |             if pdef.get("type") == "nested":
 66 |                 target = result
 67 |                 cumulated = []
 68 |                 for n, _ in parents:
 69 |                     cumulated.append(n)
 70 |                     key = ".".join(cumulated)
 71 |                     if key in target:
 72 |                         target = target[key]
 73 |                         cumulated = []
 74 |                 if cumulated:
 75 |                     key = ".".join(cumulated)
 76 |                     target = target.setdefault(key, {})
 77 |                 target[fname] = {}
 78 |         return result
 79 | 
 80 |     def object_fields(self):
 81 |         for fname, fdef, parents in self.iter_fields():
 82 |             pdef = parents[-1][1] if parents else {}
 83 |             if pdef.get("type") == "object" and fdef.get("type") not in ("object", "nested"):
 84 |                 yield self._dot_name(fname, parents)
 85 | 
 86 |     def sub_fields(self):
 87 |         """return all known subfields
 88 |         """
 89 |         # we do not ask subfields, for they would be lost in the mass
 90 |         for fname, fdef, parents in self.iter_fields():
 91 |             subfields = fdef.get("fields")
 92 |             if subfields:
 93 |                 subfield_parents = parents + [(fname, fdef)]
 94 |                 for subname in subfields:
 95 |                     yield self._dot_name(subname, subfield_parents)
 96 | 
 97 |     def query_builder_options(self):
 98 |         """return options suitable for
 99 |         :py:class:`luqum.elasticsearch.visitor.ElasticsearchQueryBuilder`
100 |         """
101 |         return {
102 |             "default_field": self.default_field(),
103 |             "not_analyzed_fields": list(self.not_analyzed_fields()),
104 |             "nested_fields": self.nested_fields(),
105 |             "object_fields": list(self.object_fields()),
106 |         }
107 | 


--------------------------------------------------------------------------------
/luqum/exceptions.py:
--------------------------------------------------------------------------------
 1 | class InconsistentQueryException(Exception):
 2 |     """Raised when a query have a problem in its structure
 3 |     """
 4 | 
 5 | 
 6 | class OrAndAndOnSameLevel(InconsistentQueryException):
 7 |     """
 8 |     Raised when a OR and a AND are on the same level as we don't know how to
 9 |     handle this case
10 |     """
11 | 
12 | 
13 | class NestedSearchFieldException(InconsistentQueryException):
14 |     """
15 |     Raised when a SearchField is nested in an other SearchField as it doesn't
16 |     make sense. For Instance field1:(spam AND field2:eggs)
17 |     """
18 | 
19 | 
20 | class ObjectSearchFieldException(InconsistentQueryException):
21 |     """
22 |     Raised when a doted field name is queried which is not an object field
23 |     """
24 | 
25 | 
26 | class ParseError(ValueError):
27 |     """Exception while parsing a lucene statement
28 |     """
29 | 
30 | 
31 | class ParseSyntaxError(ParseError):
32 |     """Raised when parser encounters an invalid statement
33 |     """
34 | 
35 | 
36 | class IllegalCharacterError(ParseError):
37 |     """
38 |     Raised when parser encounters an invalid character
39 |     """
40 | 


--------------------------------------------------------------------------------
/luqum/head_tail.py:
--------------------------------------------------------------------------------
  1 | """Utilities to manage head and tail of elements
  2 | 
  3 | The scope is to avoid loosing part of the original text in the final tree.
  4 | """
  5 | from .tree import Item
  6 | 
  7 | 
  8 | class TokenValue:
  9 | 
 10 |     def __init__(self, value):
 11 |         self.value = value
 12 |         self.pos = None
 13 |         self.size = None
 14 |         self.head = ""
 15 |         self.tail = ""
 16 | 
 17 |     def __repr__(self):
 18 |         return "TokenValue(%s)" % self.value
 19 | 
 20 |     def __str__(self):
 21 |         return str(self.value) if self.value else ""
 22 | 
 23 | 
 24 | class HeadTailLexer:
 25 |     """Utility to handle head and tail at lexer time.
 26 |     """
 27 | 
 28 |     LEXER_ATTR = "_luqum_headtail"
 29 | 
 30 |     @classmethod
 31 |     def handle(cls, token, orig_value):
 32 |         """Handling a token.
 33 | 
 34 |         .. note::
 35 |           PLY does not gives acces to previous tokens,
 36 |           although it does not provide any infrastructure for handling specific state.
 37 | 
 38 |           So we use the strategy
 39 |           of puting a :py:cls:`HeadTailLexer`instance as an attribute of the lexer
 40 |           each time we start a new tokenization.
 41 |         """
 42 |         # get instance
 43 |         if token.lexpos == 0:
 44 |             # first token make instance
 45 |             instance = cls()
 46 |             setattr(token.lexer, cls.LEXER_ATTR, instance)
 47 |         else:
 48 |             instance = getattr(token.lexer, cls.LEXER_ATTR)
 49 |         # handle
 50 |         instance.handle_token(token, orig_value)
 51 | 
 52 |     def __init__(self):
 53 |         self.head = None
 54 |         """This will track the head of next element, useful only for first element
 55 |         """
 56 |         self.last_elt = None
 57 |         """This will track the last token, so we can use it to add the tail to it.
 58 |         """
 59 | 
 60 |     def handle_token(self, token, orig_value):
 61 |         """Handle head and tail for tokens
 62 | 
 63 |         The scope is to avoid loosing part of the original text and keep it in elements.
 64 |         """
 65 |         # handle headtail
 66 |         if token.type == "SEPARATOR":
 67 |             if token.lexpos == 0:
 68 |                 # spaces at expression start, head for next token
 69 |                 self.head = token.value
 70 |             else:
 71 |                 # tail of last processed token
 72 |                 if self.last_elt is not None:
 73 |                     self.last_elt.value.tail += token.value
 74 |         else:
 75 |             # if there is a head, apply
 76 |             head = self.head
 77 |             if head is not None:
 78 |                 token.value.head = head
 79 |                 self.head = None
 80 |             # keep tracks of token, to apply tail later
 81 |             self.last_elt = token
 82 |         # also set pos and size
 83 |         if isinstance(token.value, (Item, TokenValue)):
 84 |             token.value.pos = token.lexpos
 85 |             token.value.size = len(orig_value)
 86 | 
 87 | 
 88 | token_headtail = HeadTailLexer.handle
 89 | 
 90 | 
 91 | class HeadTailManager:
 92 |     """Utility to hande head and tail at expression parse time
 93 |     """
 94 | 
 95 |     def pos(self, p, head_transfer=False, tail_transfer=False):
 96 |         """Compute pos and size of element 0 based on it's parts (p[1:])
 97 | 
 98 |         :param list p: the parser expression as in PLY
 99 |         :param bool head_transfer: True if head of first child will be transfered to p[0]
100 |         :param bool tail_transfer: True if tail of last child wiil be transfered to p[0]
101 |         """
102 |         # pos
103 |         if p[1].pos is not None:
104 |             p[0].pos = p[1].pos
105 |             if not head_transfer:
106 |                 # head is'nt transfered, so we are before it
107 |                 p[0].pos -= len(p[1].head)
108 |         # size
109 |         p[0].size = sum(
110 |             (elt.size or 0) + len(elt.head or "") + len(elt.tail or "") for elt in p[1:])
111 |         if head_transfer and p[1].head:
112 |             # we account head in size, remove it
113 |             p[0].size -= len(p[1].head)
114 |         last_p = p[len(p) - 1]  # negative indexing not supported by PLY
115 |         if tail_transfer and last_p.tail:
116 |             # we account head in size, remove it
117 |             p[0].size -= len(last_p.tail)
118 | 
119 |     def binary_operation(self, p, op_tail):
120 |         self.pos(p, head_transfer=False, tail_transfer=False)
121 |         # correct size
122 |         p[0].size -= len(op_tail)
123 | 
124 |     def simple_term(self, p):
125 |         self.pos(p, head_transfer=True, tail_transfer=True)
126 |         p[0].head = p[1].head
127 |         p[0].tail = p[1].tail
128 | 
129 |     def unary(self, p):
130 |         """OP expr"""
131 |         self.pos(p, head_transfer=True, tail_transfer=False)
132 |         p[0].head = p[1].head
133 |         p[2].head = p[1].tail + p[2].head
134 | 
135 |     def post_unary(self, p):
136 |         """expr OP"""
137 |         self.pos(p, head_transfer=False, tail_transfer=True)
138 |         p[1].tail += p[2].head
139 |         p[0].tail = p[2].tail
140 | 
141 |     def paren(self, p):
142 |         """( expr )"""
143 |         self.pos(p, head_transfer=True, tail_transfer=True)
144 |         # p[0] is global element (Group or FieldGroup)
145 |         # p[2] is content
146 |         # p[1] is left parenthesis
147 |         p[0].head = p[1].head
148 |         p[2].head = p[1].tail + p[2].head
149 |         # p[3] is right parenthesis
150 |         p[2].tail += p[3].head
151 |         p[0].tail = p[3].tail
152 | 
153 |     def range(self, p):
154 |         """[ expr TO expr ]"""
155 |         self.pos(p, head_transfer=True, tail_transfer=True)
156 |         # p[0] is global element (Range)
157 |         # p[2] is lower bound
158 |         p[0].head = p[1].head
159 |         p[2].head = p[1].tail + p[2].head
160 |         # p[3] is TO
161 |         # p[4] is upper bound
162 |         p[2].tail += p[3].head
163 |         p[4].head = p[3].tail + p[4].head
164 |         # p[5] is upper braket
165 |         p[4].tail += p[5].head
166 |         p[0].tail = p[5].tail
167 | 
168 |     def search_field(self, p):
169 |         """name: expr"""
170 |         self.pos(p, head_transfer=True, tail_transfer=False)
171 |         # p[0] is global element (SearchField)
172 |         # p[1] is search field name
173 |         # p[2] is COLUMN
174 |         p[0].head = p[1].head
175 |         if p[1].tail or p[2].head:
176 |             pass  # FIXME: add warning, or handle space between point and name in SearchField ?
177 |         # p[3] is the expression
178 |         p[3].head = p[2].tail + p[3].head
179 | 
180 | 
181 | head_tail = HeadTailManager()
182 | """singleton of HeadTailManager
183 | """
184 | 


--------------------------------------------------------------------------------
/luqum/naming.py:
--------------------------------------------------------------------------------
  1 | """Support for naming expressions
  2 | 
  3 | In order to use elastic search named query, we need to be able to assign names to expressions
  4 | and retrieve their positions in the query text.
  5 | 
  6 | This module adds support for that.
  7 | """
  8 | from . import tree
  9 | from .visitor import PathTrackingVisitor, PathTrackingTransformer
 10 | 
 11 | 
 12 | #: Names are added to tree items via an attribute named `_luqum_name`
 13 | NAME_ATTR = "_luqum_name"
 14 | 
 15 | 
 16 | def set_name(node, value):
 17 |     setattr(node, NAME_ATTR, value)
 18 | 
 19 | 
 20 | def get_name(node):
 21 |     return getattr(node, NAME_ATTR, None)
 22 | 
 23 | 
 24 | class TreeAutoNamer(PathTrackingVisitor):
 25 |     """Helper for :py:func:`auto_name`
 26 |     """
 27 | 
 28 |     LETTERS = "abcdefghilklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
 29 |     _pos_letter = {l: i for i, l in enumerate(LETTERS)}
 30 | 
 31 |     def next_name(self, name):
 32 |         """Given name, return next name
 33 | 
 34 |         ::
 35 |            >>> tan = TreeAutoNamer()
 36 |            >>> tan.next_name(None)
 37 |            'a'
 38 |            >>> tan.next_name('aZ')
 39 |            'aZa'
 40 |            >>> tan.next_name('azb')
 41 |            'azc'
 42 |         """
 43 |         if name is None:
 44 |             # bootstrap
 45 |             return self.LETTERS[0]
 46 |         else:
 47 |             actual_pos = self._pos_letter[name[-1]]
 48 |             try:
 49 |                 # we want to increment last letter
 50 |                 return name[:-1] + self.LETTERS[actual_pos + 1]
 51 |             except IndexError:
 52 |                 # we exhausts letters, add a new one instead
 53 |                 return name + self.LETTERS[0]
 54 | 
 55 |     def visit_base_operation(self, node, context):
 56 |         """name is to be set on children of operations
 57 |         """
 58 |         # put a _name on each children
 59 |         name = context["global"]["name"]
 60 |         for i, child in enumerate(node.children):
 61 |             name = self.next_name(name)
 62 |             set_name(child, name)
 63 |             # remember name to path
 64 |             context["global"]["name_to_path"][name] = context["path"] + (i,)
 65 |         # put name back in global context
 66 |         context["global"]["name"] = name
 67 |         yield from self.generic_visit(node, context)
 68 | 
 69 |     def visit(self, node):
 70 |         """visit the tree and add names to nodes while tracking their path
 71 |         """
 72 |         # trick: we use a "global" dict inside context dict so that when we copy context,
 73 |         # we still track the same objects
 74 |         context = {"global": {"name": None, "name_to_path": {}}}
 75 |         super().visit(node, context)
 76 |         name_to_path = context["global"]["name_to_path"]
 77 |         # handle special case, if we have no name so far, put one on the root
 78 |         if not name_to_path:
 79 |             node_name = self.next_name(context["global"]["name"])
 80 |             set_name(node, node_name)
 81 |             name_to_path[node_name] = ()
 82 |         return name_to_path
 83 | 
 84 | 
 85 | def auto_name(tree, targets=None, all_names=False):
 86 |     """Automatically add names to nodes of a parse tree, in order to be able to track matching.
 87 | 
 88 |     We add them to top nodes under operations as this is where it is useful for ES named queries
 89 | 
 90 |     :return dict: association of name with the path (as a tuple) to a the corresponding children
 91 |     """
 92 |     return TreeAutoNamer().visit(tree)
 93 | 
 94 | 
 95 | def matching_from_names(names, name_to_path):
 96 |     """Utility to convert a list of name and the result of auto_name
 97 |     to the matching parameter for :py:class:`MatchingPropagator`
 98 | 
 99 |     :param list names: list of names
100 |     :param dict name_to_path: association of names with path to children
101 |     :return tuple: (set of matching paths, set of other known paths)
102 |     """
103 |     matching = {name_to_path[name] for name in names}
104 |     return (matching, set(name_to_path.values()) - matching)
105 | 
106 | 
107 | def element_from_path(tree, path):
108 |     """Given a tree, retrieve element corresponding to path
109 | 
110 |     :param luqum.tree.Item tree: luqum expression tree
111 |     :param tuple path: tuple representing top down access to a child
112 |     :return  luqum.tree.Item: target item
113 |     """
114 |     # python likes iterations over recursivity
115 |     node = tree
116 |     path = list(path)
117 |     while path:
118 |         node = node.children[path.pop(0)]
119 |     return node
120 | 
121 | 
122 | def element_from_name(tree, name, name_to_path):
123 |     return element_from_path(tree, name_to_path[name])
124 | 
125 | 
126 | class MatchingPropagator:
127 |     """Class propagating matching to upper elements based on known base element matching
128 | 
129 |     :param luqum.tree.Item default_operation: tells how to treat UnknownOperation.
130 |         Choose between :py:class:`luqum.tree.OrOperation` and :py:class:`luqum.tree.AndOperation`
131 |     """
132 | 
133 |     OR_NODES = (tree.OrOperation,)
134 |     """A tuple of nodes types considered as OR operations
135 |     """
136 |     NEGATION_NODES = (tree.Not, tree.Prohibit)
137 |     """A tuple of nodes types considered as NOT operations
138 |     """
139 |     NO_CHILDREN_PROPAGATE = (tree.Range, tree.BaseApprox)
140 |     """A tuple of nodes for which propagation is of no use
141 |     """
142 | 
143 |     def __init__(self, default_operation=tree.OrOperation):
144 |         if default_operation is tree.OrOperation:
145 |             self.OR_NODES = self.OR_NODES + (tree.UnknownOperation,)
146 | 
147 |     def _status_from_parent(self, path, matching, other):
148 |         """Get status from nearest parent in hierarchie which had a name
149 |         """
150 |         if path in matching:
151 |             return True
152 |         elif path in other:
153 |             return False
154 |         elif not path:
155 |             return False
156 |         else:
157 |             return self._status_from_parent(path[:-1], matching, other)
158 | 
159 |     def _propagate(self, node, matching, other, path):
160 |         """recursively propagate matching
161 | 
162 |         return tuple: (
163 |             node is matching,
164 |             set of pathes of matching sub nodes,
165 |             set of pathes of non matching sub nodes)
166 |         """
167 |         paths_ok = set()  # path of nodes that are matching
168 |         paths_ko = set()  # path of nodes that are not matching
169 |         children_status = []  # bool for each children, indicating if it matches or not
170 |         # recurse children
171 |         if node.children and not isinstance(node, self.NO_CHILDREN_PROPAGATE):
172 |             for i, child in enumerate(node.children):
173 |                 child_ok, sub_ok, sub_ko = self._propagate(
174 |                     child, matching, other, path + (i,),
175 |                 )
176 |                 paths_ok.update(sub_ok)
177 |                 paths_ko.update(sub_ko)
178 |                 children_status.append(child_ok)
179 |         # resolve node status
180 |         if path in matching:
181 |             node_ok = True
182 |         elif children_status:  # compute from children
183 |             # compute parent success from children
184 |             operator = any if isinstance(node, self.OR_NODES) else all
185 |             node_ok = operator(children_status)
186 |         else:
187 |             node_ok = self._status_from_parent(path, matching, other)
188 |         if isinstance(node, self.NEGATION_NODES):
189 |             # negate result
190 |             node_ok = not node_ok
191 |         # add node to the right set
192 |         target_set = paths_ok if node_ok else paths_ko
193 |         target_set.add(path)
194 |         # return result
195 |         return node_ok, paths_ok, paths_ko
196 | 
197 |     def __call__(self, tree, matching, other=frozenset()):
198 |         """
199 |         Given a list of paths that are known to match,
200 |         return all pathes in the tree that are matches.
201 | 
202 |         .. note:: we do not descend into nodes that are positive.
203 |            Normally matching just provides nodes at the right levels
204 |            for propagation to be effective.
205 |            Descending would mean risking to give non consistent information.
206 | 
207 |         :param list matching: list of path of matching nodes (each path is a tuple)
208 |         :param list other: list of other path that had a name, but were not reported as matching
209 | 
210 |         :return tuple: (
211 |             set of matching path after propagation,
212 |             set of non matching pathes after propagation)
213 |         """
214 |         tree_ok, paths_ok, paths_ko = self._propagate(tree, matching, other, ())
215 |         return paths_ok, paths_ko
216 | 
217 | 
218 | class ExpressionMarker(PathTrackingTransformer):
219 |     """A visitor to mark a tree based on elements belonging to a path or not
220 | 
221 |     One intended usage is to add marker around nodes matching a request,
222 |     by altering tail and head of elements
223 |     """
224 | 
225 |     def mark_node(self, node, path, *info):
226 |         """implement this in your own code, maybe altering the head / tail arguments
227 |         """
228 |         return node
229 | 
230 |     def generic_visit(self, node, context):
231 |         # we simply generate new_node and mark it
232 |         new_node, = super().generic_visit(node, context)
233 |         yield self.mark_node(new_node, context["path"], *context["info"])
234 | 
235 |     def __call__(self, tree, *info):
236 |         return self.visit(tree, context={"info": info})
237 | 
238 | 
239 | class HTMLMarker(ExpressionMarker):
240 |     """from paths that are ok or ko, add html elements with right class around elements
241 | 
242 |     :param str ok_class: class for elements in paths_ok
243 |     :param str ko_class: class for elements in paths_ko
244 |     :param str element: html element used to surround sub expressions
245 |     """
246 | 
247 |     def __init__(self, ok_class="ok", ko_class="ko", element="span"):
248 |         super().__init__()
249 |         self.ok_class = ok_class
250 |         self.ko_class = ko_class
251 |         self.element = element
252 | 
253 |     def css_class(self, path, paths_ok, paths_ko):
254 |         return self.ok_class if path in paths_ok else self.ko_class if path in paths_ko else None
255 | 
256 |     def mark_node(self, node, path, paths_ok, paths_ko, parcimonious):
257 |         node_class = self.css_class(path, paths_ok, paths_ko)
258 |         add_class = node_class is not None
259 |         if add_class and parcimonious:
260 |             # find nearest parent with a class
261 |             parent_class = None
262 |             parent_path = path
263 |             while parent_class is None and parent_path:
264 |                 parent_path = parent_path[:-1]
265 |                 parent_class = self.css_class(parent_path, paths_ok, paths_ko)
266 |             # only add class if different from parent
267 |             add_class = node_class != parent_class
268 |         if add_class:
269 |             node.head = f'<{self.element} class="{node_class}">{node.head}'
270 |             node.tail = f'{node.tail}</{self.element}>'
271 |         return node
272 | 
273 |     def __call__(self, tree, paths_ok, paths_ko, parcimonious=True):
274 |         """representation of tree, adding html elements with right class around subexpressions
275 |         according to their presence in paths_ok or paths_ko
276 | 
277 |         :param tree: a luqum tree
278 |         :param paths_ok: set of path to nodes (express as tuple of int) that should get ok_class
279 |         :param paths_ko: set of path to nodes that should get ko_class
280 |         :param parcimonious: only add class when parent node does not have same class
281 | 
282 |         :return str: expression with html elements surrounding part of expression
283 |           with right class attribute according to paths_ok and paths_ko
284 |         """
285 |         new_tree = super().__call__(tree, paths_ok, paths_ko, parcimonious)
286 |         return new_tree.__str__(head_tail=True)
287 | 


--------------------------------------------------------------------------------
/luqum/parsetab.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # parsetab.py
 3 | # This file is automatically generated. Do not edit.
 4 | # pylint: disable=W,C,R
 5 | _tabversion = '3.10'
 6 | 
 7 | _lr_method = 'LALR'
 8 | 
 9 | _lr_signature = 'leftIMPLICIT_OPleftOR_OPleftAND_OPnonassocPLUSMINUSnonassocBOOSTnonassocTOrightUMINUSAND_OP APPROX BOOST COLUMN GREATERTHAN LBRACKET LESSTHAN LPAREN MINUS NOT OR_OP PHRASE PLUS RBRACKET REGEX RPAREN TERM TOexpression : expression OR_OP expressionexpression : expression AND_OP expressionexpression : expression expression %prec IMPLICIT_OPunary_expression : PLUS unary_expressionunary_expression : MINUS unary_expressionunary_expression : NOT unary_expressionexpression : unary_expressionunary_expression : LPAREN expression RPARENunary_expression : LBRACKET                            phrase_or_possibly_negative_term                            TO phrase_or_possibly_negative_term                           RBRACKETpossibly_negative_term : MINUS phrase_or_term  %prec UMINUS\n                              | phrase_or_termphrase_or_possibly_negative_term : possibly_negative_term\n                                        | PHRASEunary_expression : LESSTHAN phrase_or_termunary_expression : GREATERTHAN phrase_or_termunary_expression : TERM COLUMN unary_expressionunary_expression : PHRASEunary_expression : PHRASE APPROXunary_expression : unary_expression BOOSTunary_expression : TERMunary_expression : TERM APPROXunary_expression : REGEXunary_expression : TOphrase_or_term : TERM\n                      | PHRASE'
10 |     
11 | _lr_action_items = {'PLUS':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[3,3,-7,3,3,3,3,-23,-20,-17,-22,3,3,3,-19,-4,-5,-6,3,-24,-14,-25,-15,3,-21,-18,3,3,-8,-16,-9,]),'MINUS':([0,1,2,3,4,5,6,7,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,37,39,41,],[4,4,-7,4,4,4,4,25,-23,-20,-17,-22,4,4,4,-19,-4,-5,-6,4,-24,-14,-25,-15,4,-21,-18,4,4,-8,25,-16,-9,]),'NOT':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[5,5,-7,5,5,5,5,-23,-20,-17,-22,-3,5,5,-19,-4,-5,-6,5,-24,-14,-25,-15,5,-21,-18,-1,-2,-8,-16,-9,]),'LPAREN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[6,6,-7,6,6,6,6,-23,-20,-17,-22,-3,6,6,-19,-4,-5,-6,6,-24,-14,-25,-15,6,-21,-18,-1,-2,-8,-16,-9,]),'LBRACKET':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[7,7,-7,7,7,7,7,-23,-20,-17,-22,-3,7,7,-19,-4,-5,-6,7,-24,-14,-25,-15,7,-21,-18,-1,-2,-8,-16,-9,]),'LESSTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[9,9,-7,9,9,9,9,-23,-20,-17,-22,-3,9,9,-19,-4,-5,-6,9,-24,-14,-25,-15,9,-21,-18,-1,-2,-8,-16,-9,]),'GREATERTHAN':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[10,10,-7,10,10,10,10,-23,-20,-17,-22,-3,10,10,-19,-4,-5,-6,10,-24,-14,-25,-15,10,-21,-18,-1,-2,-8,-16,-9,]),'TERM':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[11,11,-7,11,11,11,11,27,-23,27,27,-20,-17,-22,-3,11,11,-19,-4,-5,-6,11,27,-24,-14,-25,-15,11,-21,-18,-1,-2,-8,27,-16,-9,]),'PHRASE':([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,25,27,28,29,30,31,32,33,34,35,36,37,39,41,],[12,12,-7,12,12,12,12,24,-23,29,29,-20,-17,-22,-3,12,12,-19,-4,-5,-6,12,29,-24,-14,-25,-15,12,-21,-18,-1,-2,-8,24,-16,-9,]),'REGEX':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,27,28,29,30,31,32,33,34,35,36,39,41,],[13,13,-7,13,13,13,13,-23,-20,-17,-22,-3,13,13,-19,-4,-5,-6,13,-24,-14,-25,-15,13,-21,-18,-1,-2,-8,-16,-9,]),'TO':([0,1,2,3,4,5,6,8,11,12,13,14,15,16,17,18,19,20,21,22,23,24,26,27,28,29,30,31,32,33,34,35,36,38,39,41,],[8,8,-7,8,8,8,8,-23,-20,-17,-22,8,8,8,-19,-4,-5,-6,8,37,-12,-13,-11,-24,-14,-25,-15,8,-21,-18,8,8,-8,-10,-16,-9,]),'$end':([1,2,8,11,12,13,14,17,18,19,20,27,28,29,30,32,33,34,35,36,39,41,],[0,-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'OR_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[15,-7,-23,-20,-17,-22,15,-19,-4,-5,-6,15,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'AND_OP':([1,2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[16,-7,-23,-20,-17,-22,16,-19,-4,-5,-6,16,-24,-14,-25,-15,-21,-18,16,-2,-8,-16,-9,]),'RPAREN':([2,8,11,12,13,14,17,18,19,20,21,27,28,29,30,32,33,34,35,36,39,41,],[-7,-23,-20,-17,-22,-3,-19,-4,-5,-6,36,-24,-14,-25,-15,-21,-18,-1,-2,-8,-16,-9,]),'BOOST':([2,8,11,12,13,17,18,19,20,27,28,29,30,32,33,36,39,41,],[17,-23,-20,-17,-22,-19,17,17,17,-24,-14,-25,-15,-21,-18,-8,17,-9,]),'COLUMN':([11,],[31,]),'APPROX':([11,12,],[32,33,]),'RBRACKET':([23,24,26,27,29,38,40,],[-12,-13,-11,-24,-25,-10,41,]),}
12 | 
13 | _lr_action = {}
14 | for _k, _v in _lr_action_items.items():
15 |    for _x,_y in zip(_v[0],_v[1]):
16 |       if not _x in _lr_action:  _lr_action[_x] = {}
17 |       _lr_action[_x][_k] = _y
18 | del _lr_action_items
19 | 
20 | _lr_goto_items = {'expression':([0,1,6,14,15,16,21,34,35,],[1,14,21,14,34,35,14,14,14,]),'unary_expression':([0,1,3,4,5,6,14,15,16,21,31,34,35,],[2,2,18,19,20,2,2,2,2,2,39,2,2,]),'phrase_or_possibly_negative_term':([7,37,],[22,40,]),'possibly_negative_term':([7,37,],[23,23,]),'phrase_or_term':([7,9,10,25,37,],[26,28,30,38,26,]),}
21 | 
22 | _lr_goto = {}
23 | for _k, _v in _lr_goto_items.items():
24 |    for _x, _y in zip(_v[0], _v[1]):
25 |        if not _x in _lr_goto: _lr_goto[_x] = {}
26 |        _lr_goto[_x][_k] = _y
27 | del _lr_goto_items
28 | _lr_productions = [
29 |   ("S' -> expression","S'",1,None,None,None),
30 |   ('expression -> expression OR_OP expression','expression',3,'p_expression_or','parser.py',254),
31 |   ('expression -> expression AND_OP expression','expression',3,'p_expression_and','parser.py',260),
32 |   ('expression -> expression expression','expression',2,'p_expression_implicit','parser.py',266),
33 |   ('unary_expression -> PLUS unary_expression','unary_expression',2,'p_expression_plus','parser.py',272),
34 |   ('unary_expression -> MINUS unary_expression','unary_expression',2,'p_expression_minus','parser.py',278),
35 |   ('unary_expression -> NOT unary_expression','unary_expression',2,'p_expression_not','parser.py',284),
36 |   ('expression -> unary_expression','expression',1,'p_expression_unary','parser.py',290),
37 |   ('unary_expression -> LPAREN expression RPAREN','unary_expression',3,'p_grouping','parser.py',295),
38 |   ('unary_expression -> LBRACKET phrase_or_possibly_negative_term TO phrase_or_possibly_negative_term RBRACKET','unary_expression',5,'p_range','parser.py',301),
39 |   ('possibly_negative_term -> MINUS phrase_or_term','possibly_negative_term',2,'p_possibly_negative_term','parser.py',313),
40 |   ('possibly_negative_term -> phrase_or_term','possibly_negative_term',1,'p_possibly_negative_term','parser.py',314),
41 |   ('phrase_or_possibly_negative_term -> possibly_negative_term','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',323),
42 |   ('phrase_or_possibly_negative_term -> PHRASE','phrase_or_possibly_negative_term',1,'p_phrase_or_possibly_negative_term','parser.py',324),
43 |   ('unary_expression -> LESSTHAN phrase_or_term','unary_expression',2,'p_lessthan','parser.py',329),
44 |   ('unary_expression -> GREATERTHAN phrase_or_term','unary_expression',2,'p_greaterthan','parser.py',336),
45 |   ('unary_expression -> TERM COLUMN unary_expression','unary_expression',3,'p_field_search','parser.py',343),
46 |   ('unary_expression -> PHRASE','unary_expression',1,'p_quoting','parser.py',352),
47 |   ('unary_expression -> PHRASE APPROX','unary_expression',2,'p_proximity','parser.py',357),
48 |   ('unary_expression -> unary_expression BOOST','unary_expression',2,'p_boosting','parser.py',363),
49 |   ('unary_expression -> TERM','unary_expression',1,'p_terms','parser.py',369),
50 |   ('unary_expression -> TERM APPROX','unary_expression',2,'p_fuzzy','parser.py',374),
51 |   ('unary_expression -> REGEX','unary_expression',1,'p_regex','parser.py',380),
52 |   ('unary_expression -> TO','unary_expression',1,'p_to_as_term','parser.py',386),
53 |   ('phrase_or_term -> TERM','phrase_or_term',1,'p_phrase_or_term','parser.py',392),
54 |   ('phrase_or_term -> PHRASE','phrase_or_term',1,'p_phrase_or_term','parser.py',393),
55 | ]
56 | 


--------------------------------------------------------------------------------
/luqum/pretty.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """This module provides a pretty printer for lucene query tree.
  3 | """
  4 | from .tree import BaseOperation, BaseGroup, SearchField
  5 | 
  6 | 
  7 | class _StickMarker:
  8 |     """Use in list between two elements that must stick together
  9 |     """
 10 | 
 11 |     def __len__(self):
 12 |         return 0
 13 | 
 14 | 
 15 | # a marker to avoid a new line between two elements
 16 | _STICK_MARKER = _StickMarker()
 17 | 
 18 | 
 19 | class Prettifier(object):
 20 |     """Class to generate a pretty printer.
 21 |     """
 22 | 
 23 |     def __init__(self, indent=4, max_len=80, inline_ops=False):
 24 |         """
 25 |         The pretty printer factory.
 26 | 
 27 |         :param int indent: number of space for indentation
 28 |         :param int max_len: maximum line length in number of characters.
 29 |             Prettyfier will do its best to keep inside those margin,
 30 |             but as it can only split on operators, it may not be possible.
 31 |         :param bool inline_ops: if False (default) operators are printed on a new line
 32 |           if True, operators are printed at the end of the line.
 33 |         """
 34 |         self.indent = indent
 35 |         self.prefix = " " * self.indent
 36 |         self.max_len = max_len
 37 |         self.inline_ops = inline_ops
 38 | 
 39 |     def _get_chains(self, element, parent=None):
 40 |         """return a list of string and list, and recursively
 41 | 
 42 |         An inner list represent a level of indentation
 43 |         A string is information from the level
 44 |         """
 45 |         if isinstance(element, BaseOperation):
 46 |             if not isinstance(parent, BaseOperation) or element.op == parent.op:
 47 |                 # same level, this is just associativity
 48 |                 num_children = len(element.children)
 49 |                 for n, child in enumerate(element.children):
 50 |                     yield from self._get_chains(child, element)
 51 |                     if n < num_children - 1:
 52 |                         if self.inline_ops:
 53 |                             yield _STICK_MARKER
 54 |                         if element.op:
 55 |                             yield element.op
 56 |             else:
 57 |                 # another operation, raise level
 58 |                 new_level = []
 59 |                 num_children = len(element.children)
 60 |                 for n, child in enumerate(element.children):
 61 |                     new_level.extend(self._get_chains(child, element))
 62 |                     if n < num_children - 1:
 63 |                         if self.inline_ops:
 64 |                             new_level.append(_STICK_MARKER)
 65 |                         if element.op:
 66 |                             new_level.append(element.op)
 67 |                 yield new_level
 68 |         elif isinstance(element, BaseGroup):
 69 |             # raise level
 70 |             yield "("
 71 |             yield list(self._get_chains(element.expr, element))
 72 |             if self.inline_ops:
 73 |                 yield _STICK_MARKER
 74 |             yield ")"
 75 |         elif isinstance(element, SearchField):
 76 |             # use recursion on sub expression
 77 |             yield element.name + ":"
 78 |             yield _STICK_MARKER
 79 |             yield from self._get_chains(element.expr, element)
 80 |         else:
 81 |             # simple element
 82 |             yield str(element)
 83 | 
 84 |     def _count_chars(self, element):
 85 |         """Replace each element by the element and a count of chars in it (and recursively)
 86 | 
 87 |         This will help, compute if elements can stand on a line or not
 88 |         """
 89 |         if isinstance(element, list):
 90 |             with_counts = [self._count_chars(c)for c in element]
 91 |             # when counting we add a space for joining
 92 |             return with_counts, sum(n + 1 for c, n in with_counts) - 1
 93 |         else:
 94 |             return element, len(element)
 95 | 
 96 |     def _apply_stick(self, elements):
 97 |         last = None
 98 |         sticking = False
 99 |         for current in elements:
100 |             if current == _STICK_MARKER:
101 |                 assert last is not None, "_STICK_MARKER should never be first !"
102 |                 sticking = True
103 |             elif sticking:
104 |                 last += " " + current
105 |                 sticking = False
106 |             else:
107 |                 if last is not None:
108 |                     yield last
109 |                 last = current
110 |         yield last
111 | 
112 |     def _concatenates(self, chain_with_counts, char_counts, level=0, in_one_liner=False):
113 |         """taking the result of _get_chains after passing through _count_chars,
114 |         arrange things, using newlines and indentation when necessary
115 | 
116 |         :return string: prettified expression
117 |         """
118 |         # evaluate if it's feasible in one-line
119 |         one_liner = in_one_liner or char_counts < self.max_len - (self.indent * level)
120 |         new_level = level if one_liner else level + 1
121 |         elements = [
122 |             self._concatenates(c, n, level=new_level, in_one_liner=one_liner)
123 |             if isinstance(c, list)
124 |             else c
125 |             for c, n in chain_with_counts]
126 |         elements = self._apply_stick(elements)
127 |         prefix = self.prefix if level and not in_one_liner else ""
128 |         join_char = " " if one_liner else ("\n" + prefix)
129 |         return prefix + join_char.join(line for c in elements for line in c.split("\n"))
130 | 
131 |     def __call__(self, tree):
132 |         """Pretty print the query represented by tree
133 | 
134 |         :param tree: a query tree using elements from :py:mod:`luqum.tree`
135 |         """
136 |         chains = list(self._get_chains(tree))
137 |         chain_with_counts, total = self._count_chars(chains)
138 |         return self._concatenates(chain_with_counts, total)
139 | 
140 | 
141 | prettify = Prettifier()
142 | """prettify function with default parameters
143 | """
144 | 


--------------------------------------------------------------------------------
/luqum/thread.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | from . import parser
 4 | 
 5 | thread_local = threading.local()
 6 | 
 7 | 
 8 | def parse(input=None, lexer=None, debug=False, tracking=False):
 9 |     """A (hopefully) thread safe version of :py:meth:`luqum.parser.parse`
10 | 
11 |     PLY is not thread safe because of its lexer state, but cloning it we can be
12 |     thread safe. see: https://github.com/jurismarches/luqum/issues/72
13 | 
14 |     Warning: The parameter ``lexer``, ``debug`` and ``tracking`` are not used.
15 |     They are still present for signature compatibility.
16 |     """
17 |     if not hasattr(thread_local, "lexer"):
18 |         thread_local.lexer = parser.lexer.clone()
19 |     return parser.parser.parse(input, lexer=thread_local.lexer)
20 | 


--------------------------------------------------------------------------------
/luqum/visitor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Base classes to implement a visitor pattern.
  3 | """
  4 | 
  5 | 
  6 | def camel_to_lower(name):
  7 |     return "".join(
  8 |         "_" + w.lower() if w.isupper() else w.lower()
  9 |         for w in name).lstrip("_")
 10 | 
 11 | 
 12 | class TreeVisitor:
 13 |     """
 14 |     Tree Visitor base class.
 15 | 
 16 |     This class is meant to be subclassed, with the subclass implementing
 17 |     visitor methods for each Node type it is interested in.
 18 | 
 19 |     By default, those visitor method should be named ``'visit_'`` + class
 20 |     name of the node, converted to lower_case (ie: visit_search_node for a
 21 |     SearchNode class)[#tweakvisit]_.
 22 | 
 23 |     It's up to the visit method of each node to recursively call children (or not)
 24 |     It may be done simply by calling the generic_visit method.
 25 | 
 26 |     By default the `generic_visit`, simply trigger visit of subnodes, yielding no information.
 27 | 
 28 |     If the goal is to modify the initial tree, to get a new modified copy
 29 |     use :py:class:`TreeTranformer` instead.
 30 | 
 31 |     .. [#tweakvisit]: You can tweak this behaviour
 32 |        by overriding the `visitor_method_prefix` & `generic_visitor_method_name` class attributes.
 33 | 
 34 |     :param bool track_parents: if True the context will contain parents of current node as a list.
 35 |         It's up to you to maintain this list in your own methods.
 36 |     """
 37 |     visitor_method_prefix = 'visit_'
 38 |     generic_visitor_method_name = 'generic_visit'
 39 | 
 40 |     def __init__(self, track_parents=False):
 41 |         self.track_parents = track_parents
 42 | 
 43 |     _get_method_cache = None
 44 | 
 45 |     def _get_method(self, node):
 46 |         if self._get_method_cache is None:
 47 |             self._get_method_cache = {}
 48 |         try:
 49 |             meth = self._get_method_cache[type(node)]
 50 |         except KeyError:
 51 |             for cls in node.__class__.mro():
 52 |                 try:
 53 |                     method_name = "{}{}".format(
 54 |                         self.visitor_method_prefix,
 55 |                         camel_to_lower(cls.__name__)
 56 |                     )
 57 |                     meth = getattr(self, method_name)
 58 |                     break
 59 |                 except AttributeError:
 60 |                     continue
 61 |             else:
 62 |                 meth = getattr(self, self.generic_visitor_method_name)
 63 |             self._get_method_cache[type(node)] = meth
 64 |         return meth
 65 | 
 66 |     def visit(self, tree, context=None):
 67 |         """Traversal of tree
 68 | 
 69 |         :param luqum.tree.Item tree: a tree representing a lucene expression
 70 |         :param dict context: a dict with initial values for context
 71 | 
 72 |         .. note:: the values in context, are not guaranteed to move up the hierachy,
 73 |            because we do copy of context for children to have specific values.
 74 | 
 75 |            A trick you can use if you need values to move up the hierachy
 76 |            is to set a `"global"` key containing a dict, where you can store values.
 77 |         """
 78 |         if context is None:
 79 |             context = {}
 80 |         return list(self.visit_iter(tree, context=context))
 81 | 
 82 |     def visit_iter(self, node, context):
 83 |         """
 84 |         Basic, recursive traversal of the tree.
 85 | 
 86 |         :param list parents: the list of parents
 87 |         :param dict context: a dict of contextual variable for free use
 88 |             to track states while traversing the tree (eg. the current field name)
 89 |         """
 90 |         method = self._get_method(node)
 91 |         yield from method(node, context)
 92 | 
 93 |     def child_context(self, node, child, context, **kwargs):
 94 |         """Generate a context for children.
 95 | 
 96 |         The context children is distinct from its parent context,
 97 |         so that visit in a branch does not affect others.
 98 | 
 99 |         .. note:: If you need global parameters,
100 |             a trick is to put them in dict in a "global" entry
101 |             as we do a swallow copy of context, and not a deep one.
102 | 
103 |         :param luqum.tree.Item node: parent node
104 |         :param luqum.tree.Item child: child node
105 |         :param dict context: parent context
106 |         :return dict: child context
107 |         """
108 |         child_context = dict(context)
109 |         if self.track_parents:
110 |             child_context["parents"] = context.get("parents", ()) + (node,)
111 |         return child_context
112 | 
113 |     def generic_visit(self, node, context):
114 |         """
115 |         Default visitor function, called if nothing matches the current node.
116 | 
117 |         It simply visit children.
118 | 
119 |         :param luqum.tree.Item node: current node
120 |         :param dict context: context (aka local parameters received from parents)
121 |         """
122 |         for child in node.children:
123 |             child_context = self.child_context(node, child, context)
124 |             yield from self.visit_iter(child, context=child_context)
125 | 
126 | 
127 | class TreeTransformer(TreeVisitor):
128 |     """A version of TreeVisitor that is aimed at obtaining a transformed copy of tree.
129 | 
130 |     .. note:: It is far better to build a transformed copy,
131 |         than to modify in place the original tree, as it is less error prone.
132 | 
133 |     :param bool track_new_parents: do we want to track new parents in the context ?
134 |     """
135 | 
136 |     def __init__(self, track_new_parents=False, **kwargs):
137 |         self.track_new_parents = track_new_parents
138 |         super().__init__(**kwargs)
139 | 
140 |     def _clone_item(self, node):
141 |         """simply call node.clone_item
142 | 
143 |         Surcharge this method to add specific tweaks if needed (like copying special attributes)
144 |         """
145 |         return node.clone_item()
146 | 
147 |     def visit(self, tree, context=None):
148 |         """Visit the tree, by default building a copy and returning it.
149 | 
150 |         :param luqum.tree.Item tree: luqum expression tree
151 |         :param context: optional initial context
152 |         """
153 |         if context is None:
154 |             context = {}
155 |         try:
156 |             value, = self.visit_iter(tree, context=context)
157 |             return value
158 |         except ValueError as e:
159 |             if str(e).startswith(("too many values to unpack", "not enough values to unpack")):
160 |                 exc = ValueError(
161 |                     "The visit of the tree should have produced exactly one element "
162 |                     "(the transformed tree)"
163 |                 )
164 |                 raise exc from e
165 |             else:
166 |                 raise
167 | 
168 |     def child_context(self, node, child, context, **kwargs):
169 |         child_context = super().child_context(node, child, context, **kwargs)
170 |         if self.track_new_parents:
171 |             child_context["new_parents"] = context.get("new_parents", ()) + (kwargs["new_node"],)
172 |         return child_context
173 | 
174 |     def generic_visit(self, node, context):
175 |         """
176 |         Default visitor function, called if nothing matches the current node.
177 | 
178 |         It simply clone node and children
179 |         """
180 |         new_node = self._clone_item(node)
181 |         new_node.children = list(self.clone_children(node, new_node, context))
182 |         yield new_node
183 | 
184 |     def clone_children(self, node, new_node, context):
185 |         """Helper to clone children.
186 | 
187 |         .. note:: a children may generate more than one children or none, for flexibility
188 |            but it's up to the transformer to ensure everything is ok
189 |         """
190 |         for child in node.children:
191 |             child_context = self.child_context(node, child, context, new_node=new_node)
192 |             new_children = self.visit_iter(child, context=child_context)
193 |             for new_child in new_children:
194 |                 yield new_child
195 | 
196 | 
197 | class PathTrackingMixin:
198 |     """It can be useful to compute path of an element (as tuple of index in parent children)
199 | 
200 |     This mixin provides base components
201 |     """
202 | 
203 |     def child_context(self, node, child, context, **kwargs):
204 |         """Thanks to "path" and "position" in kwargs, we add the path of children
205 |         """
206 |         child_context = super().child_context(node, child, context, **kwargs)
207 |         child_context["path"] = context["path"] + (kwargs["position"],)
208 |         return child_context
209 | 
210 |     def visit(self, node, context=None):
211 |         """visit the tree while tracking their path
212 |         """
213 |         if context is None:
214 |             context = {}
215 |         context["path"] = ()
216 |         return super().visit(node, context=context)
217 | 
218 | 
219 | class PathTrackingVisitor(PathTrackingMixin, TreeVisitor):
220 |     """Path tracking version of TreeVisitor
221 |     """
222 | 
223 |     def generic_visit(self, node, context):
224 |         for i, child in enumerate(node.children):
225 |             child_context = self.child_context(node, child, context, position=i)
226 |             yield from self.visit_iter(child, context=child_context)
227 | 
228 | 
229 | class PathTrackingTransformer(PathTrackingMixin, TreeTransformer):
230 |     """Path tracking version of TreeTransformer
231 |     """
232 | 
233 |     def clone_children(self, node, new_node, context):
234 |         for i, child in enumerate(node.children):
235 |             child_context = self.child_context(node, child, context, new_node=new_node, position=i)
236 |             new_children = self.visit_iter(child, context=child_context)
237 |             for new_child in new_children:
238 |                 yield new_child
239 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | addopts = '--doctest-modules --doctest-glob="test_*.rst" --cov=luqum --cov-branch --cov-report html --no-cov-on-fail'
3 | python_files = 'test_*.py tests.py'
4 | 
5 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | -r requirements.txt
 2 | 
 3 | coverage==7.6.10
 4 | elasticsearch-dsl==8.17.1
 5 | flake8==7.1.1
 6 | pytest==8.3.4
 7 | pytest-cov==6.0.0
 8 | Sphinx==8.1.3
 9 | wheel==0.45.1
10 | build==1.2.2.post1
11 | twine==6.1.0
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ply==3.11
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length=100
3 | exclude=
4 |     parsetab.py
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from setuptools import setup
 3 | 
 4 | from luqum import __version__
 5 | 
 6 | 
 7 | with open('README.rst', 'r') as f:
 8 |     long_description = f.read()
 9 | with open('CHANGELOG.rst', 'r') as f:
10 |     long_description += "\n\n" + f.read()
11 | 
12 | 
13 | setup(
14 |     name='luqum',
15 |     version=__version__,
16 |     description="A Lucene query parser generating ElasticSearch queries and more !",
17 |     long_description=long_description,
18 |     author='Jurismarches',
19 |     author_email='contact@jurismarches.com',
20 |     url='https://github.com/jurismarches/luqum',
21 |     packages=[
22 |         'luqum',
23 |         'luqum.elasticsearch'
24 |     ],
25 |     install_requires=[
26 |         'ply>=3.11',
27 |     ],
28 |     classifiers=[
29 |         'Development Status :: 4 - Beta',
30 |         'License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)',
31 |         'License :: OSI Approved :: Apache Software License',
32 |         'Intended Audience :: Developers',
33 |         'Programming Language :: Python',
34 |         'Programming Language :: Python :: 3.10',
35 |         'Programming Language :: Python :: 3.11',
36 |         'Programming Language :: Python :: 3.12',
37 |         'Programming Language :: Python :: 3.13',
38 |     ],
39 | )
40 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/__init__.py


--------------------------------------------------------------------------------
/tests/alternative_lexer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Fake Lexer to test: [Multiple Parsers and
 3 | Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
 4 | """
 5 | 
 6 | # List of token names.   This is always required
 7 | tokens = (
 8 |     "NUMBER",
 9 |     "PLUS",
10 |     "MINUS",
11 |     "TIMES",
12 |     "DIVIDE",
13 |     "LPAREN",
14 |     "RPAREN",
15 | )
16 | 
17 | # Regular expression rules for simple tokens
18 | t_PLUS = r"\+"
19 | t_MINUS = r"-"
20 | t_TIMES = r"\*"
21 | t_DIVIDE = r"/"
22 | t_LPAREN = r"\("
23 | t_RPAREN = r"\)"
24 | 
25 | 
26 | # A regular expression rule with some action code
27 | def t_NUMBER(t):
28 |     r"\d+"
29 |     t.value = int(t.value)
30 |     return t
31 | 
32 | 
33 | # Define a rule so we can track line numbers
34 | def t_newline(t):
35 |     r"\n+"
36 |     t.lexer.lineno += len(t.value)
37 | 
38 | 
39 | # A string containing ignored characters (spaces and tabs)
40 | t_ignore = " \t"
41 | 
42 | 
43 | # Error handling rule
44 | def t_error(t):
45 |     print("Illegal character '%s'" % t.value[0])
46 |     t.lexer.skip(1)
47 | 


--------------------------------------------------------------------------------
/tests/test_auto_head_tail.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from unittest import TestCase
 3 | 
 4 | from luqum.tree import (
 5 |     SearchField, FieldGroup, Group, Word, Phrase, Proximity, Fuzzy, Range,
 6 |     Not, AndOperation, OrOperation, Plus, UnknownOperation)
 7 | from luqum.auto_head_tail import auto_head_tail
 8 | 
 9 | 
10 | class AutoHeadTailTestCase(TestCase):
11 | 
12 |     def test_or_operation(self):
13 |         tree = OrOperation(Word("foo"), Word("bar"), Word("baz"))
14 |         self.assertEqual(str(tree), "fooORbarORbaz")
15 |         self.assertEqual(str(auto_head_tail(tree)), "foo OR bar OR baz")
16 | 
17 |     def test_and_operation(self):
18 |         tree = AndOperation(Word("foo"), Word("bar"), Word("baz"))
19 |         self.assertEqual(str(tree), "fooANDbarANDbaz")
20 |         self.assertEqual(str(auto_head_tail(tree)), "foo AND bar AND baz")
21 | 
22 |     def test_unknown_operation(self):
23 |         tree = UnknownOperation(Word("foo"), Word("bar"), Word("baz"))
24 |         self.assertEqual(str(tree), "foobarbaz")
25 |         self.assertEqual(str(auto_head_tail(tree)), "foo bar baz")
26 | 
27 |     def test_range(self):
28 |         tree = Range(Word("foo"), Word("bar"))
29 |         self.assertEqual(str(tree), "[fooTObar]")
30 |         self.assertEqual(str(auto_head_tail(tree)), "[foo TO bar]")
31 | 
32 |     def test_not(self):
33 |         tree = Not(Word("foo"))
34 |         self.assertEqual(str(tree), "NOTfoo")
35 |         self.assertEqual(str(auto_head_tail(tree)), "NOT foo")
36 | 
37 |     def test_complex(self):
38 |         tree = Group(
39 |             OrOperation(
40 |                 SearchField(
41 |                     "foo",
42 |                     FieldGroup(UnknownOperation(Word("bar"), Range(Word("baz"), Word("spam")))),
43 |                 ),
44 |                 Not(Proximity(Phrase('"ham ham"'), 2)),
45 |                 Plus(Fuzzy(Word("hammer"), 3)),
46 |             )
47 |         )
48 |         self.assertEqual(str(tree), '(foo:(bar[bazTOspam])ORNOT"ham ham"~2OR+hammer~3)')
49 |         self.assertEqual(
50 |             str(auto_head_tail(tree)),
51 |             '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)',
52 |         )
53 |         # idem potent
54 |         self.assertEqual(
55 |             str(auto_head_tail(auto_head_tail(tree))),
56 |             '(foo:(bar [baz TO spam]) OR NOT "ham ham"~2 OR +hammer~3)',
57 |         )
58 | 
59 |     def test_auto_head_tail_no_change_to_existing(self):
60 |         tree = AndOperation(
61 |             Range(Word("foo", tail="\t"), Word("bar", head="\n"), tail="\r"),
62 |             Not(Word("baz", head="\t\t"), head="\n\n", tail="\r\r"),
63 |             Word("spam", head="\t\n"),
64 |         )
65 |         self.assertEqual(str(tree), "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam")
66 |         self.assertEqual(
67 |             str(auto_head_tail(tree)),
68 |             "[foo\tTO\nbar]\rAND\n\nNOT\t\tbaz\r\rAND\t\nspam"
69 |         )
70 | 


--------------------------------------------------------------------------------
/tests/test_deprecated_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | .. todo:: split this file in multiple file: tree, lexer, parser
  4 | """
  5 | import collections
  6 | import copy
  7 | from unittest import TestCase
  8 | 
  9 | from luqum.tree import Group, Word, Phrase, AndOperation, OrOperation
 10 | from luqum.deprecated_utils import LuceneTreeVisitor, LuceneTreeTransformer, LuceneTreeVisitorV2
 11 | 
 12 | 
 13 | class TreeVisitorTestCase(TestCase):
 14 | 
 15 |     class BasicVisitor(LuceneTreeVisitor):
 16 |         """ Dummy visitor, simply yielding a list of nodes. """
 17 |         def generic_visit(self, node, parents):
 18 |             yield node
 19 | 
 20 |     class MROVisitor(LuceneTreeVisitor):
 21 | 
 22 |         def visit_or_operation(self, node, parents=[]):
 23 |             return ["{} OR {}".format(*node.children)]
 24 | 
 25 |         def visit_base_operation(self, node, parents=[]):
 26 |             return ["{} BASE_OP {}".format(*node.children)]
 27 | 
 28 |         def visit_word(self, node, parents=[]):
 29 |             return [node.value]
 30 | 
 31 |     def test_generic_visit(self):
 32 |         tree = (
 33 |             AndOperation(
 34 |                 Word("foo"),
 35 |                 Word("bar")))
 36 | 
 37 |         visitor = LuceneTreeVisitor()
 38 |         nodes = list(visitor.visit(tree))
 39 |         self.assertEqual(nodes, [])
 40 | 
 41 |     def test_basic_traversal(self):
 42 |         tree = (
 43 |             AndOperation(
 44 |                 Word("foo"),
 45 |                 Word("bar")))
 46 | 
 47 |         visitor = self.BasicVisitor()
 48 |         nodes = list(visitor.visit(tree))
 49 | 
 50 |         self.assertListEqual(
 51 |             [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')],
 52 |             nodes)
 53 | 
 54 |     def test_mro(self):
 55 |         visitor = self.MROVisitor()
 56 | 
 57 |         tree = OrOperation(Word('a'), Word('b'))
 58 |         result = visitor.visit(tree)
 59 |         self.assertEqual(list(result), ['a OR b', 'a', 'b'])
 60 | 
 61 |         tree = AndOperation(Word('a'), Word('b'))
 62 |         result = visitor.visit(tree)
 63 |         self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b'])
 64 | 
 65 | 
 66 | class TreeTransformerTestCase(TestCase):
 67 | 
 68 |     class BasicTransformer(LuceneTreeTransformer):
 69 |         """
 70 |         Dummy transformer that simply turn any Word node's value into "lol"
 71 |         """
 72 |         def visit_word(self, node, parent):
 73 |             return Word('lol')
 74 | 
 75 |         def visit_phrase(self, node, parent):
 76 |             return None
 77 | 
 78 |     class OrListOperation(OrOperation):
 79 |         """Dummy operation having list operands instead of tuple
 80 |         """
 81 |         def __init__(self, *args, **kwargs):
 82 |             super().__init__(*args, **kwargs)
 83 |             self.operands = list(self.operands)
 84 | 
 85 |     def test_basic_traversal(self):
 86 |         tree = (
 87 |             AndOperation(
 88 |                 Word("foo"),
 89 |                 Word("bar")))
 90 | 
 91 |         transformer = self.BasicTransformer()
 92 |         new_tree = transformer.visit(tree)
 93 | 
 94 |         self.assertEqual(
 95 |             new_tree,
 96 |             (AndOperation(
 97 |                 Word("lol"),
 98 |                 Word("lol"))))
 99 | 
100 |     def test_no_transform(self):
101 |         tree = AndOperation()
102 |         transformer = self.BasicTransformer()
103 |         new_tree = transformer.visit(tree)
104 |         self.assertEqual(
105 |             new_tree,
106 |             AndOperation())
107 | 
108 |     def test_one_word(self):
109 |         tree = Word("foo")
110 |         transformer = self.BasicTransformer()
111 |         new_tree = transformer.visit(tree)
112 |         self.assertEqual(
113 |             new_tree,
114 |             Word("lol"))
115 | 
116 |     def test_removal(self):
117 |         tree = (
118 |             AndOperation(
119 |                 AndOperation(
120 |                     Word("foo"),
121 |                     Phrase('"bar"')),
122 |                 AndOperation(
123 |                     Phrase('"baz"'),
124 |                     Phrase('"biz"'))))
125 | 
126 |         transformer = self.BasicTransformer()
127 |         new_tree = transformer.visit(tree)
128 | 
129 |         self.assertEqual(
130 |             new_tree,
131 |             (AndOperation(
132 |                 AndOperation(Word("lol")),
133 |                 AndOperation())))
134 | 
135 |     def test_operands_list(self):
136 |         OrListOperation = self.OrListOperation
137 |         tree = (
138 |             OrListOperation(
139 |                 OrListOperation(
140 |                     Word("foo"),
141 |                     Phrase('"bar"')),
142 |                 OrListOperation(
143 |                     Phrase('"baz"'))))
144 | 
145 |         transformer = self.BasicTransformer()
146 |         new_tree = transformer.visit(tree)
147 | 
148 |         self.assertEqual(
149 |             new_tree,
150 |             (OrListOperation(
151 |                 OrListOperation(Word("lol")),
152 |                 OrListOperation())))
153 | 
154 |     def test_silent_value_error(self):
155 |         # in the case some attribute mislead the search for node do not raise
156 |         tree = AndOperation(Word("a"), Word("b"))
157 |         setattr(tree, "misleading1", ())
158 |         setattr(tree, "misleading2", [])
159 |         # hackishly patch __dict__ to be sure we have operands in right order for test
160 |         tree.__dict__ = collections.OrderedDict(tree.__dict__)
161 |         tree.__dict__['operands'] = tree.__dict__.pop('operands')  # operands are now last
162 | 
163 |         transformer = self.BasicTransformer()
164 |         new_tree = transformer.visit(tree)
165 | 
166 |         self.assertEqual(
167 |             new_tree,
168 |             AndOperation(Word("lol"), Word("lol")))
169 | 
170 |     def test_repeating_expression(self):
171 |         # non regression test
172 |         tree = AndOperation(
173 |             Group(OrOperation(Word('bar'), Word('foo'))),
174 |             Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))),
175 |         )
176 |         # basic transformer should not change tree
177 |         same_tree = LuceneTreeTransformer().visit(copy.deepcopy(tree))
178 |         self.assertEqual(same_tree, tree)
179 | 
180 | 
181 | class TreeVisitorV2TestCase(TestCase):
182 | 
183 |     class BasicVisitor(LuceneTreeVisitorV2):
184 |         """ Dummy visitor, simply yielding a list of nodes. """
185 |         def generic_visit(self, node, parents, context):
186 |             yield node
187 |             for c in node.children:
188 |                 yield from self.visit(c, parents + [node], context)
189 | 
190 |     class MROVisitor(LuceneTreeVisitorV2):
191 | 
192 |         def visit_or_operation(self, node, parents=[], context=None):
193 |             return "{} OR {}".format(*[self.visit(c) for c in node.children])
194 | 
195 |         def visit_base_operation(self, node, parents=[], context=None):
196 |             return "{} BASE_OP {}".format(*[self.visit(c) for c in node.children])
197 | 
198 |         def visit_word(self, node, parents=[], context=None):
199 |             return node.value
200 | 
201 |     def test_basic_traversal(self):
202 |         tree = (
203 |             AndOperation(
204 |                 Word("foo"),
205 |                 Word("bar")))
206 | 
207 |         visitor = self.BasicVisitor()
208 |         nodes = list(visitor.visit(tree))
209 | 
210 |         self.assertListEqual(
211 |             [AndOperation(Word('foo'), Word('bar')), Word('foo'), Word('bar')],
212 |             nodes)
213 | 
214 |     def test_mro(self):
215 |         visitor = self.MROVisitor()
216 | 
217 |         tree = OrOperation(Word('a'), Word('b'))
218 |         result = visitor.visit(tree)
219 |         self.assertEqual(result, 'a OR b')
220 | 
221 |         tree = OrOperation(AndOperation(Word('a'), Word('b')), Word('c'))
222 |         result = visitor.visit(tree)
223 |         self.assertEqual(result, 'a BASE_OP b OR c')
224 | 
225 |     def test_generic_visit_fails_by_default(self):
226 |         visitor = self.MROVisitor()
227 |         with self.assertRaises(AttributeError):
228 |             visitor.visit(Phrase('"test"'))
229 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jurismarches/luqum/64d3d4a1bcdc56f156185df1f4d146582f294998/tests/test_elasticsearch/__init__.py


--------------------------------------------------------------------------------
/tests/test_elasticsearch/book.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "books": [
  3 |         {
  4 |             "title": "Harry Potter and the Philosopher's Stone",
  5 |             "edition": "Bloomsbury",
  6 |             "author": {
  7 |                 "name": "J. K. Rowling",
  8 |                 "birthdate": "1965-07-31"
  9 |             },
 10 |             "illustrators": [
 11 |                 {
 12 |                     "name": "Thomas Taylor",
 13 |                     "nationality": "UK",
 14 |                     "birthdate": "1973-05-22"
 15 |                 },
 16 |                 {
 17 |                     "name": "Mary GrandPré",
 18 |                     "nationality":"US",
 19 |                     "birthdate": "1954-02-13"
 20 |                 }
 21 |             ],
 22 |             "publication_date": "1997-06-26",
 23 |             "n_pages": "223",
 24 |             "ref": "HP1"
 25 |         },
 26 |         {
 27 |             "title": "Harry Potter and the Chamber of Secrets",
 28 |             "edition": "Bloomsbury",
 29 |             "author": {
 30 |                 "name": "J. K. Rowling",
 31 |                 "birthdate": "1965-07-31"
 32 |             },
 33 |             "illustrators": [
 34 |                 {
 35 |                     "name": "Cliff Wright",
 36 |                     "nationality": "UK",
 37 |                     "birthdate": "1953-10-24"
 38 |                 },
 39 |                 {
 40 |                     "name": "Mary GrandPré",
 41 |                     "nationality": "US",
 42 |                     "birthdate": "1954-02-13"
 43 |                 }
 44 |             ],
 45 |             "publication_date": "1998-07-02",
 46 |             "n_pages": "251",
 47 |             "ref": "HP2"
 48 |         },
 49 |         {
 50 |             "title": "Harry Potter and the Prisoner of Azkaban",
 51 |             "edition": "Bloomsbury",
 52 |             "author": {
 53 |                 "name": "J. K. Rowling",
 54 |                 "birthdate": "1965-07-31"
 55 |             },
 56 |             "illustrators": [
 57 |                 {
 58 |                     "name": "Cliff Wright",
 59 |                     "nationality": "UK",
 60 |                     "birthdate": "1953-10-24"
 61 |                 },
 62 |                 {
 63 |                     "name": "Mary GrandPré",
 64 |                     "nationality": "US",
 65 |                     "birthdate": "1954-02-13"
 66 |                 }
 67 |             ],
 68 |             "publication_date": "1999-07-08",
 69 |             "n_pages": "317",
 70 |             "ref": "HP3"
 71 |         },
 72 |         {
 73 |             "title": "Harry Potter and the Goblet of Fire",
 74 |             "edition": "Bloomsbury",
 75 |             "author": {
 76 |                 "name": "J. K. Rowling",
 77 |                 "birthdate": "1965-07-31"
 78 |             },
 79 |             "illustrators": [
 80 |                 {
 81 |                     "name": "Giles Greenfield",
 82 |                     "nationality": "UK"
 83 |                 },
 84 |                 {
 85 |                     "name": "Mary GrandPré",
 86 |                     "nationality": "US",
 87 |                     "birthdate": "1954-02-13"
 88 |                 }
 89 |             ],
 90 |             "publication_date": "2000-07-08",
 91 |             "n_pages": "636",
 92 |             "ref": "HP4"
 93 |         },
 94 |         {
 95 |             "title": "Harry Potter and the Order of the Phoenix",
 96 |             "edition": "Bloomsbury",
 97 |             "author": {
 98 |                 "name": "J. K. Rowling",
 99 |                 "birthdate": "1965-07-31"
100 |             },
101 |             "illustrators": [
102 |                 {
103 |                     "name":"Jason Cockcroft",
104 |                     "nationality":"UK"
105 |                 },
106 |                 {
107 |                     "name": "Mary GrandPré",
108 |                     "nationality": "US",
109 |                     "birthdate": "1954-02-13"
110 |                 }
111 |             ],
112 |             "publication_date": "2003-06-21",
113 |             "n_pages": "766",
114 |             "ref": "HP5"
115 |         },
116 |         {
117 |             "title": "Harry Potter and the Half-Blood Prince",
118 |             "edition": "Bloomsbury",
119 |             "author": {
120 |                 "name": "J. K. Rowling",
121 |                 "birthdate": "1965-07-31"
122 |             },
123 |             "illustrators": [
124 |                 {
125 |                     "name": "Jason Cockcroft",
126 |                     "nationality": "UK"
127 |                 },
128 |                 {
129 |                     "name": "Mary GrandPré",
130 |                     "nationality": "US",
131 |                     "birthdate": "1954-02-13"
132 |                 }
133 |             ],
134 |             "publication_date": "2005-07-16",
135 |             "n_pages": "607",
136 |             "ref": "HP6"
137 |         },
138 |         {
139 |             "title": "Harry Potter and the Deathly Hallows",
140 |             "edition": "Bloomsbury",
141 |             "author": {
142 |                 "name": "J. K. Rowling",
143 |                 "birthdate": "1965-07-31"
144 |             },
145 |             "illustrators": [
146 |                 {
147 |                     "name": "Jason Cockcroft",
148 |                     "nationality": "UK"
149 |                 },
150 |                 {
151 |                     "name": "Mary GrandPré",
152 |                     "nationality": "US",
153 |                     "birthdate": "1954-02-13"
154 |                 }
155 |             ],
156 |             "publication_date": "2007-07-21",
157 |             "n_pages": "607",
158 |             "ref": "HP7"
159 |         },
160 |         {
161 |             "title": "Harry Potter and the Cursed Child",
162 |             "edition": "Little, Brown and Company",
163 |             "author": {
164 |                 "name": "J. K. Rowling",
165 |                 "birthdate": "1965-07-31"
166 |             },
167 |             "illustrators": [],
168 |             "publication_date": "2016-07-30",
169 |             "n_pages": "360",
170 |             "ref": "HP8"
171 |         },
172 |         {
173 |             "title": "The Tales of Beedle the Bard",
174 |             "edition": "Lumos (charity)",
175 |             "author": {
176 |                 "name": "J. K. Rowling",
177 |                 "birthdate": "1965-07-31"
178 |             },
179 |             "illustrators": [
180 |                 {
181 |                     "name":"J. K. Rowling",
182 |                     "nationality": "UK",
183 |                     "birthdate": "1965-07-31"
184 |                 }
185 |             ],
186 |             "publication_date": "2008-12-04",
187 |             "n_pages": "157",
188 |             "ref": "BB1"
189 |         }
190 |     ]
191 | }
192 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/es_integration_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import elasticsearch_dsl
  5 | from elasticsearch.exceptions import ConnectionError
  6 | from elasticsearch.helpers import bulk
  7 | from elasticsearch_dsl import Date, Index, Integer, Nested, Object, Search, analyzer
  8 | from elasticsearch_dsl.connections import connections
  9 | 
 10 | from luqum.elasticsearch import ElasticsearchQueryBuilder, SchemaAnalyzer
 11 | 
 12 | 
 13 | MAJOR_ES = elasticsearch_dsl.VERSION[0]
 14 | if MAJOR_ES > 2:
 15 |     from elasticsearch_dsl import Keyword
 16 | 
 17 | ES6 = False
 18 | if MAJOR_ES >= 6:
 19 |     from elasticsearch_dsl import Text, Document, InnerDoc
 20 | 
 21 |     ES6 = True
 22 | else:
 23 |     from elasticsearch_dsl import (
 24 |         String as Text,
 25 |         DocType as Document,
 26 |         InnerObjectWrapper as InnerDoc,
 27 |     )
 28 | 
 29 | 
 30 | def get_es():
 31 |     """Return an es connection or None if none seems available.
 32 | 
 33 |     Also wait for ES to be ready (yellow status)
 34 |     """
 35 |     # you may use ES_HOST environment variable to configure Elasticsearch
 36 |     # launching something like
 37 |     # docker run --rm -p "127.0.0.1:9200:9200" -e "discovery.type=single-node" elasticsearch:7.8.0
 38 |     # is a simple way to get an instance
 39 |     connections.configure(
 40 |         default=dict(hosts=os.environ.get("ES_HOST", "http://localhost:9200"), timeout=20)
 41 |     )
 42 |     try:
 43 |         client = connections.get_connection("default")
 44 |         # check ES running
 45 |         client.cluster.health(wait_for_status='yellow')
 46 |     except ConnectionError:
 47 |         client = None
 48 |     return client
 49 | 
 50 | 
 51 | if MAJOR_ES > 2:
 52 | 
 53 |     class Illustrator(InnerDoc):
 54 |         """Inner object to be nested in Book, details on an illustrator
 55 |         """
 56 |         name = Text()
 57 |         birthdate = Date()
 58 |         nationality = Keyword()
 59 | 
 60 | 
 61 | class Book(Document):
 62 |     """An objects representing a book in ES
 63 |     """
 64 |     title = Text(fields={
 65 |         "no_vowels": Text(
 66 |             analyzer=analyzer("no_vowels", "pattern", pattern=r"[\Waeiouy]"),  # noqa: W605
 67 |             search_analyzer="standard"
 68 |         )
 69 |     })
 70 |     ref = Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed")
 71 |     edition = Text()
 72 |     author = Object(properties={"name": Text(), "birthdate": Date()})
 73 |     publication_date = Date()
 74 |     n_pages = Integer()
 75 | 
 76 |     if ES6:
 77 |         illustrators = Nested(Illustrator)
 78 | 
 79 |         class Index:
 80 |             name = "bk"
 81 | 
 82 |     else:
 83 |         illustrators = Nested(
 84 |             properties={
 85 |                 "name": Text(),
 86 |                 "birthdate": Date(),
 87 |                 "nationality": Keyword() if MAJOR_ES > 2 else Text(index="not_analyzed"),
 88 |             }
 89 |         )
 90 | 
 91 |         class Meta:
 92 |             index = "bk"
 93 | 
 94 | 
 95 | def add_book_data(es):
 96 |     """Create a "bk" index and fill it with data
 97 |     """
 98 |     remove_book_index(es)
 99 |     Book.init()
100 |     with open(os.path.join(os.path.dirname(__file__), "book.json")) as f:
101 |         datas = json.load(f)
102 |     actions = (
103 |         {"_op_type": "index", "_id": i, "_source": d}
104 |         for i, d in enumerate(datas["books"])
105 |     )
106 |     if MAJOR_ES >= 7:
107 |         bulk(es, actions, index="bk", refresh=True)
108 |     else:
109 |         if ES6:
110 |             doc_type = "doc"
111 |         else:
112 |             doc_type = "book"
113 |         bulk(es, actions, index="bk", doc_type=doc_type, refresh=True)
114 | 
115 | 
116 | def book_search(es):
117 |     """Return an elasticsearch_dsl search object
118 |     """
119 |     return Search(using=es, index="bk")
120 | 
121 | 
122 | def book_query_builder(es):
123 |     """Return an ElasticsearchQueryBuilder adapted for search in book.
124 | 
125 |     title is adapted to search the title.no_wowels field along with the title
126 |     """
127 |     MESSAGES_SCHEMA = {"mappings": Book._doc_type.mapping.to_dict()}
128 |     schema_analizer = SchemaAnalyzer(MESSAGES_SCHEMA)
129 |     builder_options = schema_analizer.query_builder_options()
130 |     builder_options['field_options'] = {
131 |         'title.no_vowels': {
132 |             'match_type': 'multi_match',
133 |             'type': 'most_fields',
134 |             'fields': ('title', 'title.no_vowels')
135 |         }
136 |     }
137 |     return ElasticsearchQueryBuilder(**builder_options)
138 | 
139 | 
140 | def remove_book_index(es):
141 |     """clean "bk" index
142 |     """
143 |     if es is None:
144 |         return
145 |     if ES6:
146 |         Book._index.delete(ignore=404)
147 |     else:
148 |         Index("bk").delete(ignore=404)
149 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_es_integration.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase, skipIf
  2 | 
  3 | from luqum.parser import parser
  4 | 
  5 | from .es_integration_utils import (
  6 |     add_book_data, book_query_builder, book_search, get_es, remove_book_index,
  7 | )
  8 | 
  9 | 
 10 | @skipIf(get_es() is None, "Skipping ES test as ES seems unreachable")
 11 | class LuqumRequestTestCase(TestCase):
 12 | 
 13 |     @classmethod
 14 |     def setUpClass(cls):
 15 |         cls.es_client = get_es()
 16 |         if cls.es_client is None:
 17 |             return
 18 |         cls.es_builder = book_query_builder(cls.es_client)
 19 |         cls.search = book_search(cls.es_client)
 20 |         add_book_data(cls.es_client)
 21 | 
 22 |     def _ask_luqum(self, req):
 23 |         tree = parser.parse(req)
 24 |         query = self.es_builder(tree)
 25 |         return [x.title for x in self.search.filter(query).execute()]
 26 | 
 27 |     def test_simple_field_search(self):
 28 |         self.assertListEqual(
 29 |             self._ask_luqum('title:"Chamber"'),
 30 |             ["Harry Potter and the Chamber of Secrets"],
 31 |         )
 32 | 
 33 |     def test_nested_field_search(self):
 34 |         self.assertListEqual(
 35 |             self._ask_luqum("illustrators:(name:Giles)"),
 36 |             ["Harry Potter and the Goblet of Fire"],
 37 |         )
 38 | 
 39 |     def test_or_condition_search(self):
 40 |         self.assertCountEqual(
 41 |             self._ask_luqum(
 42 |                 'illustrators:(name:"Giles Greenfield" OR name:"Cliff Wright")'
 43 |             ),
 44 |             [
 45 |                 "Harry Potter and the Prisoner of Azkaban",
 46 |                 "Harry Potter and the Chamber of Secrets",
 47 |                 "Harry Potter and the Goblet of Fire",
 48 |             ],
 49 |         )
 50 | 
 51 |     def test_and_condition_search(self):
 52 |         self.assertCountEqual(
 53 |             self._ask_luqum(
 54 |                 'illustrators:(name:"Cliff Wright") AND illustrators:(name:"Mary GrandPré")'
 55 |             ),
 56 |             [
 57 |                 "Harry Potter and the Prisoner of Azkaban",
 58 |                 "Harry Potter and the Chamber of Secrets",
 59 |             ],
 60 |         )
 61 | 
 62 |     def test_date_range_search(self):
 63 |         self.assertCountEqual(
 64 |             self._ask_luqum("publication_date:[2005-01-01 TO 2010-12-31]"),
 65 |             [
 66 |                 "Harry Potter and the Half-Blood Prince",
 67 |                 "The Tales of Beedle the Bard",
 68 |                 "Harry Potter and the Deathly Hallows",
 69 |             ],
 70 |         )
 71 | 
 72 |     def test_int_range_search(self):
 73 |         self.assertCountEqual(
 74 |             self._ask_luqum("n_pages:[500 TO *]"),
 75 |             [
 76 |                 "Harry Potter and the Half-Blood Prince",
 77 |                 "Harry Potter and the Order of the Phoenix",
 78 |                 "Harry Potter and the Deathly Hallows",
 79 |                 "Harry Potter and the Goblet of Fire",
 80 |             ],
 81 |         )
 82 | 
 83 |     def test_int_search(self):
 84 |         self.assertListEqual(
 85 |             self._ask_luqum("n_pages:360"), ["Harry Potter and the Cursed Child"]
 86 |         )
 87 | 
 88 |     def test_proximity_search(self):
 89 |         self.assertListEqual(
 90 |             self._ask_luqum('title:"Harry Secrets"~5'),
 91 |             ["Harry Potter and the Chamber of Secrets"],
 92 |         )
 93 | 
 94 |     def test_fuzzy_search(self):
 95 |         self.assertListEqual(
 96 |             self._ask_luqum("title:Gublet~2"), ["Harry Potter and the Goblet of Fire"]
 97 |         )
 98 | 
 99 |     def test_object_field_search(self):
100 |         self.assertListEqual(
101 |             self._ask_luqum('illustrators:(name:"J. K. Rowling")'),
102 |             ["The Tales of Beedle the Bard"],
103 |         )
104 | 
105 |     def test_fail_search(self):
106 |         self.assertListEqual(self._ask_luqum("title:secret"), [])
107 | 
108 |     def test_wildcard_matching(self):
109 |         self.assertListEqual(
110 |             self._ask_luqum("title:secret*"),
111 |             ["Harry Potter and the Chamber of Secrets"],
112 |         )
113 | 
114 |     def test_wildcard1_search(self):
115 |         self.assertListEqual(
116 |             self._ask_luqum("title:P*ix"), ["Harry Potter and the Order of the Phoenix"]
117 |         )
118 | 
119 |     def test_not_search(self):
120 |         self.assertListEqual(
121 |             self._ask_luqum("-title:Harry"), ["The Tales of Beedle the Bard"]
122 |         )
123 | 
124 |     def test_not_analysed_field_search(self):
125 |         self.assertListEqual(self._ask_luqum("illustrators:nationality:uk"), [])
126 | 
127 |     def test_complex_search(self):
128 |         self.assertListEqual(
129 |             self._ask_luqum(
130 |                 """
131 |                     title:phoenux~2 AND
132 |                     illustrators:name:Grand* AND
133 |                     illustrators:(
134 |                         -name:grandpr* AND (
135 |                             name:J*on OR birthdate:[1950-01-01 TO 1970-01-01]
136 |                         )
137 |                     )
138 |                 """
139 |             ),
140 |             ["Harry Potter and the Order of the Phoenix"],
141 |         )
142 | 
143 |     def test_subfield_multi_match_search(self):
144 |         self.assertListEqual(
145 |             self._ask_luqum("title.no_vowels:Potter AND title.no_vowels:x"),
146 |             ["Harry Potter and the Order of the Phoenix"],
147 |         )
148 | 
149 |     @classmethod
150 |     def tearDownClass(cls):
151 |         remove_book_index(cls.es_client)
152 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_estree.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from luqum.elasticsearch.tree import EShould, EWord
 4 | 
 5 | 
 6 | class TestItems(TestCase):
 7 | 
 8 |     def test_should_operation_options(self):
 9 |         op = EShould(items=[EWord(q="a"), EWord(q="b"), EWord(q="c")], minimum_should_match=2)
10 |         self.assertEqual(
11 |             op.json,
12 |             {'bool': {
13 |                 'should': [
14 |                     {'term': {'': {'value': 'a'}}},
15 |                     {'term': {'': {'value': 'b'}}},
16 |                     {'term': {'': {'value': 'c'}}},
17 |                 ],
18 |                 'minimum_should_match': 2,
19 |             }},
20 |         )
21 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_naming.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from luqum.tree import (
  4 |     AndOperation, Word, Prohibit, OrOperation, Not, Phrase, SearchField,
  5 |     UnknownOperation, Boost, Fuzzy, Proximity, Range, Group, FieldGroup,
  6 |     Plus)
  7 | from luqum.naming import auto_name, set_name
  8 | from luqum.elasticsearch.visitor import ElasticsearchQueryBuilder
  9 | 
 10 | 
 11 | class ElasticsearchTreeTransformerTestCase(TestCase):
 12 | 
 13 |     @classmethod
 14 |     def setUpClass(cls):
 15 |         cls.transformer = ElasticsearchQueryBuilder(
 16 |             default_field="text",
 17 |             not_analyzed_fields=['not_analyzed_field', 'text', 'author.tag'],
 18 |             nested_fields={
 19 |                 'author': ['name', 'tag']
 20 |             },
 21 |             object_fields=["book.title", "author.rewards.name"],
 22 |             sub_fields=["book.title.raw"],
 23 |         )
 24 | 
 25 |     def test_named_queries_match(self):
 26 |         tree = SearchField("spam", Word("bar"))
 27 |         set_name(tree, "a")
 28 |         result = self.transformer(tree)
 29 |         self.assertEqual(
 30 |             result,
 31 |             {
 32 |                 "match": {
 33 |                     "spam": {
 34 |                         "query": "bar",
 35 |                         "_name": "a",
 36 |                         "zero_terms_query": "none",
 37 |                     },
 38 |                 },
 39 |             },
 40 |         )
 41 | 
 42 |         tree = SearchField("spam", Phrase('"foo bar"'))
 43 |         set_name(tree, "a")
 44 |         result = self.transformer(tree)
 45 |         self.assertEqual(
 46 |             result,
 47 |             {
 48 |                 "match_phrase": {
 49 |                     "spam": {
 50 |                         "query": "foo bar",
 51 |                         "_name": "a",
 52 |                     },
 53 |                 },
 54 |             },
 55 |         )
 56 | 
 57 |     def test_named_queries_term(self):
 58 |         tree = SearchField("text", Word("bar"))
 59 |         set_name(tree, "a")
 60 |         result = self.transformer(tree)
 61 |         self.assertEqual(
 62 |             result,
 63 |             {"term": {"text": {"value": "bar", "_name": "a"}}},
 64 |         )
 65 | 
 66 |         tree = SearchField("text", Phrase('"foo bar"'))
 67 |         set_name(tree, "a")
 68 |         result = self.transformer(tree)
 69 |         self.assertEqual(
 70 |             result,
 71 |             {"term": {"text": {"value": "foo bar", "_name": "a"}}},
 72 |         )
 73 | 
 74 |     def test_named_queries_fuzzy(self):
 75 |         tree = SearchField("text", Fuzzy(Word('bar')))
 76 |         set_name(tree.children[0], "a")
 77 |         result = self.transformer(tree)
 78 |         self.assertEqual(
 79 |             result,
 80 |             {"fuzzy": {"text": {"value": "bar", "_name": "a", 'fuzziness': 0.5}}},
 81 |         )
 82 | 
 83 |     def test_named_queries_proximity(self):
 84 |         tree = SearchField("spam", Proximity(Phrase('"foo bar"')))
 85 |         set_name(tree.children[0], "a")
 86 |         result = self.transformer(tree)
 87 |         self.assertEqual(
 88 |             result,
 89 |             {"match_phrase": {"spam": {"query": "foo bar", "_name": "a", 'slop': 1.0}}},
 90 |         )
 91 | 
 92 |     def test_named_queries_boost(self):
 93 |         tree = SearchField("text", Boost(Phrase('"foo bar"'), force=2))
 94 |         set_name(tree.children[0], "a")
 95 |         result = self.transformer(tree)
 96 |         self.assertEqual(
 97 |             result,
 98 |             {"term": {"text": {"value": "foo bar", "_name": "a", 'boost': 2.0}}},
 99 |         )
100 | 
101 |     def test_named_queries_or(self):
102 |         tree = OrOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
103 |         set_name(tree.operands[0], "a")
104 |         set_name(tree.operands[1], "b")
105 |         result = self.transformer(tree)
106 |         self.assertEqual(
107 |             result,
108 |             {'bool': {'should': [
109 |                 {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
110 |                 {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}}
111 |             ]}}
112 |         )
113 | 
114 |     def test_named_queries_and(self):
115 |         tree = AndOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
116 |         set_name(tree.operands[0], "a")
117 |         set_name(tree.operands[1], "b")
118 |         result = self.transformer(tree)
119 |         self.assertEqual(
120 |             result,
121 |             {'bool': {'must': [
122 |                 {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
123 |                 {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'all'}}}
124 |             ]}}
125 |         )
126 | 
127 |     def test_named_queries_unknown(self):
128 |         tree = UnknownOperation(SearchField("text", Word("foo")), SearchField("spam", Word("bar")))
129 |         set_name(tree.operands[0], "a")
130 |         set_name(tree.operands[1], "b")
131 |         result = self.transformer(tree)
132 |         self.assertEqual(
133 |             result,
134 |             {'bool': {'should': [
135 |                 {'term': {'text': {'_name': 'a', 'value': 'foo'}}},
136 |                 {'match': {'spam': {'_name': 'b', 'query': 'bar', 'zero_terms_query': 'none'}}}
137 |             ]}}
138 |         )
139 | 
140 |     def test_named_queries_not(self):
141 |         tree = Not(SearchField("text", Word("foo")))
142 |         set_name(tree, "a")
143 |         result = self.transformer(tree)
144 |         self.assertEqual(
145 |             result,
146 |             {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
147 |         )
148 | 
149 |         tree = Prohibit(SearchField("text", Word("foo")))
150 |         set_name(tree, "a")
151 |         result = self.transformer(tree)
152 |         self.assertEqual(
153 |             result,
154 |             {'bool': {'must_not': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
155 |         )
156 | 
157 |     def test_named_queries_plus(self):
158 |         tree = Plus(SearchField("text", Word("foo")))
159 |         set_name(tree, "a")
160 |         result = self.transformer(tree)
161 |         self.assertEqual(
162 |             result,
163 |             {'bool': {'must': [{'term': {'text': {'_name': 'a', 'value': 'foo'}}}]}}
164 |         )
165 | 
166 |     def test_named_queries_range(self):
167 |         tree = SearchField("text", Range(Word("x"), Word("z")))
168 |         set_name(tree, "a")
169 |         result = self.transformer(tree)
170 |         self.assertEqual(result, {'range': {'text': {'_name': 'a', 'gte': 'x', 'lte': 'z'}}})
171 | 
172 |     def test_named_queries_nested(self):
173 |         tree = SearchField("author.name", Word("Monthy"))
174 |         set_name(tree, "a")
175 |         result = self.transformer(tree)
176 |         # name is repeated on query, but it's not a big deal…
177 |         self.assertEqual(
178 |             result,
179 |             {
180 |                 'nested': {
181 |                     '_name': 'a',
182 |                     'path': 'author',
183 |                     'query': {'match': {'author.name': {
184 |                         '_name': 'a', 'query': 'Monthy', 'zero_terms_query': 'none',
185 |                     }}},
186 |                 },
187 |             }
188 |         )
189 | 
190 |     def test_named_queries_object(self):
191 |         tree = SearchField("book.title", Word("Circus"))
192 |         set_name(tree, "a")
193 |         result = self.transformer(tree)
194 |         # name is repeated on query, but it's not a big deal…
195 |         self.assertEqual(
196 |             result,
197 |             {
198 |                 'match': {'book.title': {
199 |                     '_name': 'a', 'query': 'Circus', 'zero_terms_query': 'none'
200 |                 }}
201 |             }
202 |         )
203 | 
204 |     def test_named_queries_group(self):
205 |         tree = SearchField("text", FieldGroup(Word("bar")))
206 |         set_name(tree.children[0], "a")
207 |         result = self.transformer(tree)
208 |         self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},)
209 | 
210 |         tree = Group(SearchField("text", Word("bar")))
211 |         set_name(tree, "a")
212 |         result = self.transformer(tree)
213 |         self.assertEqual(result, {"term": {"text": {"value": "bar", "_name": "a"}}},)
214 | 
215 |     def test_named_queries_exists(self):
216 |         tree = SearchField("text", Word("*"))
217 |         set_name(tree.children[0], "a")
218 |         result = self.transformer(tree)
219 |         self.assertEqual(result, {"exists": {"field": "text", "_name": "a"}},)
220 | 
221 |     def test_named_queries_complex(self):
222 |         tree = (
223 |             AndOperation(
224 |                 SearchField("text", Phrase('"foo bar"')),
225 |                 Group(
226 |                     OrOperation(
227 |                         Word("bar"),
228 |                         SearchField("spam", Word("baz")),
229 |                     ),
230 |                 ),
231 |             )
232 |         )
233 |         and_op = tree
234 |         search_text = and_op.operands[0]
235 |         or_op = and_op.operands[1].children[0]
236 |         bar = or_op.operands[0]
237 |         search_spam = or_op.operands[1]
238 |         set_name(search_text, "foo_bar")
239 |         set_name(bar, "bar")
240 |         set_name(search_spam, "baz")
241 | 
242 |         expected = {
243 |             'bool': {'must': [
244 |                 {'term': {'text': {'_name': 'foo_bar', 'value': 'foo bar'}}},
245 |                 {'bool': {'should': [
246 |                     {'term': {'text': {'_name': 'bar', 'value': 'bar'}}},
247 |                     {'match': {'spam': {
248 |                         '_name': 'baz',
249 |                         'query': 'baz',
250 |                         'zero_terms_query': 'none'
251 |                     }}}
252 |                 ]}}
253 |             ]}
254 |         }
255 | 
256 |         result = self.transformer(tree)
257 |         self.assertEqual(result, expected)
258 | 
259 |     def test_auto_name_integration(self):
260 |         tree = (
261 |             AndOperation(
262 |                 SearchField("text", Phrase('"foo bar"')),
263 |                 Group(
264 |                     OrOperation(
265 |                         Word("bar"),
266 |                         SearchField("spam", Word("baz")),
267 |                     ),
268 |                 ),
269 |             )
270 |         )
271 |         auto_name(tree)
272 | 
273 |         expected = {
274 |             'bool': {'must': [
275 |                 {'term': {'text': {'_name': 'a', 'value': 'foo bar'}}},
276 |                 {'bool': {'should': [
277 |                     {'term': {'text': {'_name': 'c', 'value': 'bar'}}},
278 |                     {'match': {'spam': {
279 |                         '_name': 'd',
280 |                         'query': 'baz',
281 |                         'zero_terms_query': 'none'
282 |                     }}}
283 |                 ]}}
284 |             ]}
285 |         }
286 | 
287 |         result = self.transformer(tree)
288 |         self.assertEqual(result, expected)
289 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_nested.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from luqum.elasticsearch.nested import extract_nested_queries, get_first_name
  4 | 
  5 | 
  6 | class NestedQueriesTestCase(TestCase):
  7 | 
  8 |     def test_no_nested(self):
  9 |         queries = extract_nested_queries({"term": {"text": {"value": "spam", "_name": "spam"}}})
 10 |         self.assertEqual(queries, [])
 11 | 
 12 |         queries = extract_nested_queries(
 13 |             {"bool": {"must": [
 14 |                 {"term": {"text": {"value": "spam", "_name": "spam"}}},
 15 |                 {"term": {"text": {"value": "ham", "_name": "ham"}}},
 16 |             ]}}
 17 |         )
 18 |         self.assertEqual(queries, [])
 19 | 
 20 |     def test_nested_no_bool_inside(self):
 21 |         queries = extract_nested_queries(
 22 |             {"nested": {
 23 |                 "path": "my",
 24 |                 "query": {"term": {"text": {"value": "spam", "_name": "spam"}}}
 25 |             }}
 26 |         )
 27 |         self.assertEqual(queries, [])
 28 | 
 29 |     def test_nested_bool_inside(self):
 30 |         term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
 31 |         term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}}
 32 |         bool_query = {"bool": {"must": [term1, term2]}}
 33 |         queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query}})
 34 |         self.assertEqual(
 35 |             queries,
 36 |             [
 37 |                 {"nested": {"path": "my", "query": term1, "_name": "spam"}},
 38 |                 {"nested": {"path": "my", "query": term2, "_name": "ham"}},
 39 |             ],
 40 |         )
 41 | 
 42 |     def test_nested_in_bool_with_bool_inside(self):
 43 |         term1 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
 44 |         term2 = {"term": {"text": {"value": "ham", "_name": "ham"}}}
 45 |         term3 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
 46 |         bool_query = {"bool": {"must": [term1, term2]}}
 47 |         queries = extract_nested_queries(
 48 |             {"bool": {"should": [term3, {"nested": {"path": "my", "query": bool_query}}]}}
 49 |         )
 50 |         self.assertEqual(
 51 |             queries,
 52 |             [
 53 |                 {"nested": {"path": "my", "query": term1, "_name": "spam"}},
 54 |                 {"nested": {"path": "my", "query": term2, "_name": "ham"}},
 55 |             ],
 56 |         )
 57 | 
 58 |     def test_nested_bool_inside_bool(self):
 59 |         term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
 60 |         term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}}
 61 |         term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
 62 |         bool_query1 = {"bool": {"should": [term1, term2]}}
 63 |         bool_query2 = {"bool": {"must": [term3, bool_query1]}}
 64 |         queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}})
 65 |         self.assertEqual(queries, [
 66 |             {"nested": {"path": "my", "query": term3, "_name": "spam"}},
 67 |             {"nested": {"path": "my", "query": bool_query1}},
 68 |             {"nested": {"path": "my", "query": term1, "_name": "bar"}},
 69 |             {"nested": {"path": "my", "query": term2, "_name": "baz"}},
 70 |         ])
 71 | 
 72 |     def test_nested_inside_nested(self):
 73 |         term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
 74 |         term2 = {"term": {"text": {"value": "baz", "_name": "baz"}}}
 75 |         term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
 76 |         bool_query1 = {"bool": {"should": [term1, term2]}}
 77 |         inner_nested = {"nested": {"path": "my.your", "query": bool_query1}}
 78 |         bool_query2 = {"bool": {"must": [term3, inner_nested]}}
 79 |         queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query2}})
 80 |         self.assertEqual(queries, [
 81 |             {"nested": {"path": "my", "query": term3, "_name": "spam"}},
 82 |             {"nested": {"path": "my", "query": inner_nested}},
 83 |             {"nested": {"path": "my", "_name": "bar", "query": {"nested": {
 84 |                 "path": "my.your", "query": term1,
 85 |             }}}},
 86 |             {"nested": {"path": "my", "_name": "baz", "query": {"nested": {
 87 |                 "path": "my.your", "query": term2,
 88 |             }}}},
 89 |         ])
 90 | 
 91 |     def test_nested_inside_nested_with_nested_bool(self):
 92 |         term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
 93 |         term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
 94 |         term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
 95 |         bool_query1 = {"bool": {"must_not": [term1]}}
 96 |         bool_query2 = {"bool": {"should": [term2, bool_query1]}}
 97 |         inner_nested = {"nested": {"path": "my.your", "query": bool_query2}}
 98 |         bool_query3 = {"bool": {"must_not": [inner_nested]}}
 99 |         bool_query4 = {"bool": {"must": [term3, bool_query3]}}
100 |         queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}})
101 |         self.assertEqual(queries, [
102 |             {"nested": {"path": "my", "query": term3, "_name": "spam"}},
103 |             {"nested": {"path": "my", "query": bool_query3}},
104 |             {"nested": {"path": "my", "query": inner_nested}},
105 |             {"nested": {"path": "my", "_name": "foo", "query": {
106 |                 "nested": {"path": "my.your", "query": term2}
107 |             }}},
108 |             {"nested": {
109 |                 "path": "my", "query": {"nested": {"path": "my.your", "query": bool_query1}},
110 |             }},
111 |             {"nested": {"path": "my", "_name": "bar", "query": {
112 |                 "nested": {"path": "my.your", "query": term1}
113 |             }}},
114 |         ])
115 | 
116 |     def test_multiple_parallel_nested(self):
117 |         term1 = {"term": {"text": {"value": "bar", "_name": "bar"}}}
118 |         term2 = {"term": {"text": {"value": "foo", "_name": "foo"}}}
119 |         term3 = {"term": {"text": {"value": "spam", "_name": "spam"}}}
120 |         bool_query1 = {"bool": {"should": [term1]}}
121 |         bool_query2 = {"bool": {"must_not": [term2]}}
122 |         nested1 = {"nested": {"path": "my.your", "query": bool_query1}}
123 |         nested2 = {"nested": {"path": "my.his", "query": bool_query2}}
124 |         bool_query3 = {"bool": {"should": [nested2, nested1]}}
125 |         bool_query4 = {"bool": {"must": [term3, bool_query3]}}
126 |         queries = extract_nested_queries({"nested": {"path": "my", "query": bool_query4}})
127 |         self.assertEqual(queries, [
128 |             {"nested": {"path": "my", "query": term3, "_name": "spam"}},
129 |             {"nested": {"path": "my", "query": bool_query3}},
130 |             {"nested": {"path": "my", "query": nested2}},
131 |             {"nested": {"path": "my", "query": nested1}},
132 |             {"nested": {"path": "my", "_name": "foo", "query": {
133 |                 "nested": {"path": "my.his", "query": term2}
134 |             }}},
135 |             {"nested": {"path": "my", "_name": "bar", "query": {
136 |                 "nested": {"path": "my.your", "query": term1}
137 |             }}},
138 |         ])
139 | 
140 |     def test_get_first_name(self):
141 |         term = {"term": {"text": {"value": "bar", "_name": "bar"}}}
142 |         query = [{"query": term, "_name": "spam"}, {"query": term, "_name": "beurre"}]
143 |         name = get_first_name(query)
144 |         self.assertEqual(name, "spam")
145 | 


--------------------------------------------------------------------------------
/tests/test_elasticsearch/test_schema.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from luqum.elasticsearch.schema import SchemaAnalyzer
  4 | 
  5 | 
  6 | try:
  7 |     import elasticsearch_dsl
  8 |     ES_6 = elasticsearch_dsl.VERSION[0] >= 6
  9 | except ImportError:
 10 |     ES_6 = True
 11 | 
 12 | 
 13 | class SchemaAnalyzerTestCase(TestCase):
 14 | 
 15 |     MAPPING = {
 16 |         "properties": {
 17 |             "text": {"type": "text"},
 18 |             "author": {
 19 |                 "type": "nested",
 20 |                 "properties": {
 21 |                     "firstname": {
 22 |                         "type": "text",
 23 |                         "fields": {
 24 |                             # sub fields
 25 |                             "english": {"analyzer": "english"},
 26 |                             "raw": {"type": "keyword"},
 27 |                         }
 28 |                     },
 29 |                     "lastname": {"type": "text"},
 30 |                     "book": {
 31 |                         "type": "nested",
 32 |                         "properties": {
 33 |                             "title": {"type": "text"},
 34 |                             "isbn": {  # an object field in deep nested field
 35 |                                 "type": "object",
 36 |                                 "properties": {
 37 |                                     "ref": {
 38 |                                         "type": "keyword",
 39 |                                     },
 40 |                                 },
 41 |                             },
 42 |                             "format": {
 43 |                                 "type": "nested",
 44 |                                 "properties": {
 45 |                                     "ftype": {"type": "keyword"},
 46 |                                 },
 47 |                             },
 48 |                         },
 49 |                     },
 50 |                 },
 51 |             },
 52 |             "publish": {
 53 |                 "type": "nested",
 54 |                 "properties": {
 55 |                     "site": {"type": "keyword"},
 56 |                     "idnum": {"type": "long"},
 57 |                 },
 58 |             },
 59 |             "manager": {
 60 |                 "type": "object",
 61 |                 "properties": {
 62 |                     "firstname": {"type": "text"},
 63 |                     "address": {  # an object field in an object field
 64 |                         "type": "object",
 65 |                         "properties": {
 66 |                             "zipcode": {"type": "keyword"},
 67 |                         },
 68 |                     },
 69 |                     "subteams": {  # a nested in an object field
 70 |                         "type": "nested",
 71 |                         "properties": {
 72 |                             "supervisor": {  # with an object field inside
 73 |                                 "type": "object",
 74 |                                 "properties": {
 75 |                                     "name": {
 76 |                                         "type": "text",
 77 |                                         # sub field
 78 |                                         "fields": {"raw": {"type": "keyword"}},
 79 |                                     },
 80 |                                 },
 81 |                             },
 82 |                         },
 83 |                     },
 84 |                 },
 85 |             },
 86 |         },
 87 |     }
 88 | 
 89 |     INDEX_SETTINGS = {
 90 |         "settings": {
 91 |             "query": {"default_field": "text"},
 92 |         },
 93 |         "mappings": {},
 94 |     }
 95 | 
 96 |     def setUp(self):
 97 |         super().setUp()
 98 |         if ES_6:
 99 |             self.INDEX_SETTINGS["mappings"] = self.MAPPING
100 |         else:
101 |             self.INDEX_SETTINGS["mappings"]["type1"] = self.MAPPING
102 | 
103 |     def test_default_field(self):
104 |         s = SchemaAnalyzer(self.INDEX_SETTINGS)
105 |         self.assertEqual(s.default_field(), "text")
106 | 
107 |     def test_not_analyzed_fields(self):
108 |         s = SchemaAnalyzer(self.INDEX_SETTINGS)
109 |         self.assertEqual(
110 |             sorted(s.not_analyzed_fields()),
111 |             [
112 |                 'author.book.format.ftype',
113 |                 'author.book.isbn.ref',
114 |                 'author.firstname.raw',
115 |                 'manager.address.zipcode',
116 |                 'manager.subteams.supervisor.name.raw',
117 |                 'publish.idnum',
118 |                 'publish.site',
119 |             ],
120 |         )
121 | 
122 |     def test_nested_fields(self):
123 |         s = SchemaAnalyzer(self.INDEX_SETTINGS)
124 |         self.assertEqual(
125 |             s.nested_fields(),
126 |             {
127 |                 'author': {
128 |                     'firstname': {},
129 |                     'lastname': {},
130 |                     'book': {
131 |                         'format': {
132 |                             'ftype': {}
133 |                         },
134 |                         'title': {},
135 |                         'isbn': {},
136 |                     },
137 |                 },
138 |                 'publish': {
139 |                     'site': {},
140 |                     'idnum': {},
141 |                 },
142 |                 'manager.subteams': {  # FIXME !!!!
143 |                     'supervisor': {},
144 |                 },
145 |             }
146 |         )
147 | 
148 |     def test_object_fields(self):
149 |         s = SchemaAnalyzer(self.INDEX_SETTINGS)
150 |         self.assertEqual(
151 |             sorted(s.object_fields()),
152 |             [
153 |                 'author.book.isbn.ref',
154 |                 'manager.address.zipcode',
155 |                 'manager.firstname',
156 |                 'manager.subteams.supervisor.name',
157 |             ]
158 |         )
159 | 
160 |     def test_sub_fields(self):
161 |         s = SchemaAnalyzer(self.INDEX_SETTINGS)
162 |         self.assertEqual(
163 |             sorted(s.sub_fields()),
164 |             [
165 |                 'author.firstname.english',
166 |                 'author.firstname.raw',
167 |                 'manager.subteams.supervisor.name.raw',
168 |             ]
169 |         )
170 | 
171 |     def test_empty(self):
172 |         s = SchemaAnalyzer({})
173 |         self.assertEqual(s.default_field(), "*")
174 |         self.assertEqual(list(s.not_analyzed_fields()), [])
175 |         self.assertEqual(s.nested_fields(), {})
176 |         self.assertEqual(list(s.object_fields()), [])
177 | 


--------------------------------------------------------------------------------
/tests/test_pretty.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from luqum.pretty import Prettifier, prettify
  4 | from luqum.tree import (
  5 |     SearchField, FieldGroup, Group, Word, AndOperation, OrOperation, UnknownOperation)
  6 | 
  7 | 
  8 | class TestPrettify(TestCase):
  9 | 
 10 |     big_tree = AndOperation(
 11 |         Group(OrOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))), Word("fooooooooooo"))
 12 |     fat_tree = AndOperation(
 13 |         SearchField(
 14 |             "subject",
 15 |             FieldGroup(
 16 |                 OrOperation(
 17 |                     Word("fiiiiiiiiiiz"),
 18 |                     AndOperation(Word("baaaaaaaaaar"), Word("baaaaaaaaaaaaaz"))))),
 19 |         AndOperation(Word("fooooooooooo"), Word("wiiiiiiiiiz")))
 20 | 
 21 |     def test_one_liner(self):
 22 |         tree = AndOperation(Group(OrOperation(Word("bar"), Word("baz"))), Word("foo"))
 23 |         self.assertEqual(prettify(tree), "( bar OR baz ) AND foo")
 24 | 
 25 |     def test_with_unknown_op(self):
 26 |         prettify = Prettifier(indent=8, max_len=20)
 27 |         tree = UnknownOperation(
 28 |             Group(
 29 |                 UnknownOperation(
 30 |                     Word("baaaaaaaaaar"),
 31 |                     Word("baaaaaaaaaaaaaz"))),
 32 |             Word("fooooooooooo"))
 33 |         self.assertEqual(
 34 |             "\n" + prettify(tree), """
 35 | (
 36 |         baaaaaaaaaar
 37 |         baaaaaaaaaaaaaz
 38 | )
 39 | fooooooooooo""")
 40 | 
 41 |     def test_with_unknown_op_nested(self):
 42 |         prettify = Prettifier(indent=8, max_len=20)
 43 |         tree = OrOperation(
 44 |             UnknownOperation(
 45 |                 Word("baaaaaaaaaar"),
 46 |                 Word("baaaaaaaaaaaaaz")),
 47 |             Word("fooooooooooo"))
 48 |         self.assertEqual(
 49 |             "\n" + prettify(tree), """
 50 |         baaaaaaaaaar
 51 |         baaaaaaaaaaaaaz
 52 | OR
 53 | fooooooooooo""")
 54 | 
 55 |     def test_small(self):
 56 |         prettify = Prettifier(indent=8, max_len=20)
 57 |         self.assertEqual(
 58 |             "\n" + prettify(self.big_tree), """
 59 | (
 60 |         baaaaaaaaaar
 61 |         OR
 62 |         baaaaaaaaaaaaaz
 63 | )
 64 | AND
 65 | fooooooooooo""")
 66 |         self.assertEqual(
 67 |             "\n" + prettify(self.fat_tree), """
 68 | subject: (
 69 |         fiiiiiiiiiiz
 70 |         OR
 71 |                 baaaaaaaaaar
 72 |                 AND
 73 |                 baaaaaaaaaaaaaz
 74 | )
 75 | AND
 76 | fooooooooooo
 77 | AND
 78 | wiiiiiiiiiz""")
 79 | 
 80 |     def test_small_inline_ops(self):
 81 |         prettify = Prettifier(indent=8, max_len=20, inline_ops=True)
 82 |         self.assertEqual("\n" + prettify(self.big_tree), """
 83 | (
 84 |         baaaaaaaaaar OR
 85 |         baaaaaaaaaaaaaz ) AND
 86 | fooooooooooo""")
 87 |         self.assertEqual("\n" + prettify(self.fat_tree), """
 88 | subject: (
 89 |         fiiiiiiiiiiz OR
 90 |                 baaaaaaaaaar AND
 91 |                 baaaaaaaaaaaaaz ) AND
 92 | fooooooooooo AND
 93 | wiiiiiiiiiz""")
 94 | 
 95 |     def test_normal(self):
 96 |         prettify = Prettifier(indent=4, max_len=50)
 97 |         self.assertEqual("\n" + prettify(self.big_tree), """
 98 | (
 99 |     baaaaaaaaaar OR baaaaaaaaaaaaaz
100 | )
101 | AND
102 | fooooooooooo""")
103 |         self.assertEqual("\n" + prettify(self.fat_tree), """
104 | subject: (
105 |     fiiiiiiiiiiz
106 |     OR
107 |         baaaaaaaaaar AND baaaaaaaaaaaaaz
108 | )
109 | AND
110 | fooooooooooo
111 | AND
112 | wiiiiiiiiiz""")
113 | 
114 |     def test_normal_inline_ops(self):
115 |         prettify = Prettifier(indent=4, max_len=50, inline_ops=True)
116 |         self.assertEqual("\n" + prettify(self.big_tree), """
117 | (
118 |     baaaaaaaaaar OR baaaaaaaaaaaaaz ) AND
119 | fooooooooooo""")
120 |         self.assertEqual("\n" + prettify(self.fat_tree), """
121 | subject: (
122 |     fiiiiiiiiiiz OR
123 |         baaaaaaaaaar AND baaaaaaaaaaaaaz ) AND
124 | fooooooooooo AND
125 | wiiiiiiiiiz""")
126 | 


--------------------------------------------------------------------------------
/tests/test_quick_start.rst:
--------------------------------------------------------------------------------
1 | ../docs/source/quick_start.rst


--------------------------------------------------------------------------------
/tests/test_thread.py:
--------------------------------------------------------------------------------
 1 | import queue
 2 | import threading
 3 | 
 4 | import ply.lex as lex
 5 | 
 6 | from luqum.parser import parser
 7 | from luqum.thread import parse
 8 | from tests import alternative_lexer
 9 | 
10 | 
11 | def test_thread_parse():
12 | 
13 |     result_queue = queue.Queue()
14 |     qs1 = """
15 |         (title:"foo bar" AND body:"quick fox") OR title:fox AND
16 |         (title:"foo bar" AND body:"quick fox") OR
17 |         title:fox AND (title:"foo bar" AND body:"quick fox") OR
18 |         title:fox AND (title:"foo bar" AND body:"quick fox") OR
19 |         title:fox AND (title:"foo bar" AND body:"quick fox") OR title:fox
20 |     """
21 |     expected_tree = parser.parse(qs1)
22 | 
23 |     def run(q):
24 |         parse(qs1)
25 |         tree = parse(qs1)
26 |         q.put(tree)
27 | 
28 |     # make concurrents calls
29 |     threads = [threading.Thread(target=run, args=(result_queue,)) for i in range(100)]
30 |     for thread in threads:
31 |         thread.start()
32 |     for thread in threads:
33 |         thread.join()
34 |     assert result_queue.qsize() == 100
35 |     for i in range(100):
36 |         assert result_queue.get() == expected_tree
37 | 
38 | 
39 | def test_thread_lex_global_state():
40 |     """
41 |     Last Lexer is used globally by default by the parser. If another library
42 |     creates another lexer, it should not impact luqum.
43 | 
44 |     More info: [Multiple Parsers and
45 |     Lexers](http://www.dabeaz.com/ply/ply.html#ply_nn37)
46 |     """
47 |     qs = '(title:"foo bar" AND body:"quick fox")'
48 | 
49 |     lex.lex(module=alternative_lexer)
50 |     # if there is a "luqum.exceptions.ParseSyntaxError", the wrong lexer was
51 |     # used.
52 |     parse(qs)
53 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | 
  3 | from luqum.parser import parser
  4 | from luqum.tree import (Group, Word, AndOperation, OrOperation, BoolOperation,
  5 |                         UnknownOperation, Prohibit, Plus, From, To, Range, SearchField,
  6 |                         Boost)
  7 | from luqum.utils import UnknownOperationResolver, OpenRangeTransformer
  8 | 
  9 | 
 10 | class UnknownOperationResolverTestCase(TestCase):
 11 | 
 12 |     def test_and_resolution(self):
 13 |         tree = (
 14 |             UnknownOperation(
 15 |                 Word("a"),
 16 |                 Word("b"),
 17 |                 OrOperation(Word("c"), Word("d"))))
 18 |         expected = (
 19 |             AndOperation(
 20 |                 Word("a"),
 21 |                 Word("b"),
 22 |                 OrOperation(Word("c"), Word("d"))))
 23 |         resolver = UnknownOperationResolver(resolve_to=AndOperation)
 24 |         self.assertEqual(resolver(tree), expected)
 25 | 
 26 |     def test_or_resolution(self):
 27 |         tree = (
 28 |             UnknownOperation(
 29 |                 Word("a"),
 30 |                 Word("b"),
 31 |                 AndOperation(Word("c"), Word("d"))))
 32 |         expected = (
 33 |             OrOperation(
 34 |                 Word("a"),
 35 |                 Word("b"),
 36 |                 AndOperation(Word("c"), Word("d"))))
 37 |         resolver = UnknownOperationResolver(resolve_to=OrOperation)
 38 |         self.assertEqual(resolver(tree), expected)
 39 | 
 40 |     def test_lucene_resolution_simple(self):
 41 |         tree = (
 42 |             UnknownOperation(
 43 |                 Word("a"),
 44 |                 Word("b"),
 45 |                 UnknownOperation(Word("c"), Word("d"))))
 46 |         expected = (
 47 |             AndOperation(
 48 |                 Word("a"),
 49 |                 Word("b"),
 50 |                 AndOperation(Word("c"), Word("d"))))
 51 |         resolver = UnknownOperationResolver(resolve_to=None)
 52 |         self.assertEqual(resolver(tree), expected)
 53 | 
 54 |     def test_lucene_resolution_bool(self):
 55 |         tree = parser.parse("a b (+f +g) -(c d) +e")
 56 |         expected = (
 57 |             BoolOperation(
 58 |                 Word("a"),
 59 |                 Word("b"),
 60 |                 Group(BoolOperation(Plus(Word("f")), Plus(Word("g")))),
 61 |                 Prohibit(Group(BoolOperation(Word("c"), Word("d")))),
 62 |                 Plus(Word('e'))))
 63 |         resolver = UnknownOperationResolver(resolve_to=BoolOperation)
 64 |         self.assertEqual(resolver(tree), expected)
 65 | 
 66 |     def test_lucene_resolution_last_op(self):
 67 |         tree = (
 68 |             OrOperation(
 69 |                 Word("a"),
 70 |                 Word("b"),
 71 |                 UnknownOperation(Word("c"), Word("d")),
 72 |                 AndOperation(
 73 |                     Word("e"),
 74 |                     UnknownOperation(Word("f"), Word("g"))),
 75 |                 UnknownOperation(Word("i"), Word("j")),
 76 |                 OrOperation(
 77 |                     Word("k"),
 78 |                     UnknownOperation(Word("l"), Word("m"))),
 79 |                 UnknownOperation(Word("n"), Word("o"))))
 80 |         expected = (
 81 |             OrOperation(
 82 |                 Word("a"),
 83 |                 Word("b"),
 84 |                 OrOperation(Word("c"), Word("d")),
 85 |                 AndOperation(
 86 |                     Word("e"),
 87 |                     AndOperation(Word("f"), Word("g"))),
 88 |                 AndOperation(Word("i"), Word("j")),
 89 |                 OrOperation(
 90 |                     Word("k"),
 91 |                     OrOperation(Word("l"), Word("m"))),
 92 |                 OrOperation(Word("n"), Word("o"))))
 93 |         resolver = UnknownOperationResolver(resolve_to=None)
 94 |         self.assertEqual(resolver(tree), expected)
 95 | 
 96 |     def test_lucene_resolution_last_op_with_group(self):
 97 |         tree = (
 98 |             OrOperation(
 99 |                 Word("a"),
100 |                 Word("b"),
101 |                 Group(
102 |                     AndOperation(
103 |                         Word("c"),
104 |                         UnknownOperation(Word("d"), Word("e")))),
105 |                 UnknownOperation(Word("f"), Word("g")),
106 |                 Group(
107 |                     UnknownOperation(Word("h"), Word("i")))))
108 |         expected = (
109 |             OrOperation(
110 |                 Word("a"),
111 |                 Word("b"),
112 |                 Group(
113 |                     AndOperation(
114 |                         Word("c"),
115 |                         AndOperation(Word("d"), Word("e")))),
116 |                 OrOperation(Word("f"), Word("g")),
117 |                 Group(
118 |                     AndOperation(Word("h"), Word("i")))))
119 |         resolver = UnknownOperationResolver(resolve_to=None)
120 |         self.assertEqual(resolver(tree), expected)
121 | 
122 |     def test_resolve_to_verification(self):
123 |         with self.assertRaises(ValueError):
124 |             UnknownOperationResolver(resolve_to=object())
125 | 
126 |     def test_head_tail_pos(self):
127 |         tree = parser.parse("\ra\nb (c\t (d e f)) ")
128 |         resolver = UnknownOperationResolver(resolve_to=None)
129 |         transformed = resolver(tree)
130 |         self.assertEqual(str(transformed), "\ra\nAND b AND (c\t AND (d AND e AND f)) ")
131 |         self.assertEqual(transformed.pos, tree.pos)
132 |         self.assertEqual(transformed.size, tree.size)
133 |         and_op, orig_op = transformed.children[2].children[0], tree.children[2].children[0]
134 |         self.assertEqual(type(and_op), AndOperation)
135 |         self.assertEqual(and_op.pos, orig_op.pos)
136 |         self.assertEqual(and_op.size, orig_op.size)
137 |         and_op, orig_op = and_op.children[1].children[0], orig_op.children[1].children[0]
138 |         self.assertEqual(type(and_op), AndOperation)
139 |         self.assertEqual(and_op.pos, orig_op.pos)
140 |         self.assertEqual(and_op.size, orig_op.size)
141 | 
142 |         resolver = UnknownOperationResolver(resolve_to=OrOperation)
143 |         transformed = resolver(tree)
144 |         self.assertEqual(str(transformed), "\ra\nOR b OR (c\t OR (d OR e OR f)) ")
145 | 
146 | 
147 | class OpenRangeTransformerTestCase(TestCase):
148 |     def test_simple_resolution_from(self):
149 |         tree = (
150 |             From(Word("1"), True)
151 |         )
152 |         expected = (
153 |             Range(Word("1", tail=" "), Word("*", head=" "), True, True)
154 |         )
155 |         for merge_ranges in (True, False):
156 |             with self.subTest(merge_ranges=merge_ranges):
157 |                 resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
158 |                 output = resolver(tree)
159 |                 self.assertEqual(output, expected)
160 |                 self.assertEqual(str(output), str(expected))
161 | 
162 |     def test_simple_resolution_to(self):
163 |         tree = (
164 |             To(Word("1"), False)
165 |         )
166 |         expected = (
167 |             Range(Word("*", tail=" "), Word("1", head=" "), True, False)
168 |         )
169 |         for merge_ranges in (True, False):
170 |             with self.subTest(merge_ranges=merge_ranges):
171 |                 resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
172 |                 output = resolver(tree)
173 |                 self.assertEqual(output, expected)
174 |                 self.assertEqual(str(output), str(expected))
175 | 
176 |     def test_and_resolution(self):
177 |         tree = (
178 |             AndOperation(
179 |                 From(Word("1"), True),
180 |                 To(Word("2"), True),
181 |             )
182 |         )
183 |         expected = (
184 |             AndOperation(
185 |                 Range(Word("1", tail=" "), Word("2", head=" "), True, True)
186 |             )
187 |         )
188 |         resolver = OpenRangeTransformer(merge_ranges=True)
189 |         output = resolver(tree)
190 |         self.assertEqual(output, expected)
191 |         self.assertEqual(str(output), str(expected))
192 | 
193 |     def test_and_resolution_without_merge(self):
194 |         tree = (
195 |             AndOperation(
196 |                 From(Word("1"), True),
197 |                 To(Word("2"), True),
198 |             )
199 |         )
200 |         expected = (
201 |             AndOperation(
202 |                 Range(Word("1", tail=" "), Word("*", head=" "), True),
203 |                 Range(Word("*", tail=" "), Word("2", head=" "), True),
204 |             )
205 |         )
206 |         resolver = OpenRangeTransformer(merge_ranges=False)
207 |         output = resolver(tree)
208 |         self.assertEqual(output, expected)
209 |         self.assertEqual(str(output), str(expected))
210 | 
211 |     def test_unjoined_resolution(self):
212 |         tree = (
213 |             AndOperation(
214 |                 From(Word("1"), False),
215 |                 From(Word("2"), True),
216 |             )
217 |         )
218 |         expected = (
219 |             AndOperation(
220 |                 Range(Word("1", tail=" "), Word("*", head=" "), False, True),
221 |                 Range(Word("2", tail=" "), Word("*", head=" "), True, True)
222 |             )
223 |         )
224 |         resolver = OpenRangeTransformer(merge_ranges=True)
225 |         output = resolver(tree)
226 |         self.assertEqual(output, expected)
227 |         self.assertEqual(str(output), str(expected))
228 | 
229 |     def test_normal_ranges_are_untouched(self):
230 |         tree = (
231 |             AndOperation(
232 |                 Range(Word("1"), Word("2"), True, True),
233 |                 Range(Word("*"), Word("*"), True, True),
234 |                 Range(Word("1"), Word("*"), True, True),
235 |             )
236 |         )
237 |         for merge_ranges in (True, False):
238 |             with self.subTest(merge_ranges=merge_ranges):
239 |                 resolver = OpenRangeTransformer(merge_ranges=merge_ranges)
240 |                 output = resolver(tree)
241 |                 self.assertEqual(output, tree)
242 | 
243 |     def test_first_range_is_merged(self):
244 |         tree = (
245 |             AndOperation(
246 |                 Range(Word("*"), Word("2"), True, True),
247 |                 Range(Word("*"), Word("*"), True, True),
248 |                 Range(Word("*"), Word("3"), True, True),
249 |                 Range(Word("1"), Word("*"), True, True),
250 |                 Range(Word("4"), Word("*"), True, True),
251 |             )
252 |         )
253 |         expected = (
254 |             AndOperation(
255 |                 Range(Word("1"), Word("2"), True, True),
256 |                 Range(Word("*"), Word("*"), True, True),
257 |                 Range(Word("4"), Word("3"), True, True),
258 |             )
259 |         )
260 |         resolver = OpenRangeTransformer(merge_ranges=True)
261 |         output = resolver(tree)
262 |         self.assertEqual(output, expected)
263 |         self.assertEqual(str(output), str(expected))
264 | 
265 |     def test_do_not_merge_unknown(self):
266 |         tree = (
267 |             UnknownOperation(
268 |                 Range(Word("1"), Word("*"), True, True),
269 |                 Range(Word("*"), Word("2"), True, True),
270 |             )
271 |         )
272 |         resolver = OpenRangeTransformer(merge_ranges=True)
273 |         output = resolver(tree)
274 |         self.assertEqual(output, tree)
275 | 
276 |     def test_do_not_merge_searchfield(self):
277 |         tree = (
278 |             AndOperation(
279 |                 Range(Word("1"), Word("*"), True, True),
280 |                 SearchField("foo", Range(Word("*"), Word("2"), True, True))
281 |             )
282 |         )
283 |         resolver = OpenRangeTransformer(merge_ranges=True)
284 |         output = resolver(tree)
285 |         self.assertEqual(output, tree)
286 | 
287 |     def test_do_not_merge_boosted(self):
288 |         tree = (
289 |             AndOperation(
290 |                 Boost(Range(Word("1"), Word("*"), True, True), 2),
291 |                 Boost(Range(Word("*"), Word("2"), True, True), 2),
292 |             )
293 |         )
294 |         resolver = OpenRangeTransformer(merge_ranges=True)
295 |         output = resolver(tree)
296 |         self.assertEqual(output, tree)
297 | 


--------------------------------------------------------------------------------
/tests/test_visitor.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import copy
  3 | from unittest import TestCase
  4 | 
  5 | from luqum.tree import (
  6 |     NONE_ITEM, Group, Word, Phrase, AndOperation, OrOperation, Proximity, SearchField,
  7 |     Boost, Fuzzy, Regex,
  8 | )
  9 | from luqum.visitor import (
 10 |     PathTrackingTransformer, PathTrackingVisitor, TreeTransformer, TreeVisitor,
 11 | )
 12 | 
 13 | 
 14 | class TreeVisitorTestCase(TestCase):
 15 | 
 16 |     class BasicVisitor(TreeVisitor):
 17 |         """Dummy visitor, simply yielding a list of nodes. """
 18 | 
 19 |         def generic_visit(self, node, context):
 20 |             yield node
 21 |             yield from super().generic_visit(node, context)
 22 | 
 23 |     class TrackingParentsVisitor(TreeVisitor):
 24 |         """Visitor, yielding nodes and parents."""
 25 | 
 26 |         def generic_visit(self, node, context):
 27 |             yield node, context.get("parents")
 28 |             yield from super().generic_visit(node, context)
 29 | 
 30 |     class MROVisitor(TreeVisitor):
 31 | 
 32 |         def visit_or_operation(self, node, context):
 33 |             yield "{} OR {}".format(*node.children)
 34 |             yield from super().generic_visit(node, context)
 35 | 
 36 |         def visit_base_operation(self, node, context):
 37 |             yield "{} BASE_OP {}".format(*node.children)
 38 |             yield from super().generic_visit(node, context)
 39 | 
 40 |         def visit_word(self, node, parents=[]):
 41 |             yield node.value
 42 | 
 43 |     def test_generic_visit(self):
 44 |         tree = AndOperation(Word("foo"), Word("bar"))
 45 |         visitor = TreeVisitor()
 46 |         nodes = visitor.visit(tree)
 47 |         self.assertEqual(nodes, [])
 48 |         # with a context for coverage…
 49 |         nodes = visitor.visit(tree, context={})
 50 |         self.assertEqual(nodes, [])
 51 | 
 52 |     def test_basic_traversal(self):
 53 |         tree = AndOperation(Word("foo"), Word("bar"))
 54 |         visitor = self.BasicVisitor()
 55 |         nodes = visitor.visit(tree)
 56 |         self.assertListEqual([tree, Word("foo"), Word("bar")], nodes)
 57 | 
 58 |     def test_parents_tracking(self):
 59 |         tree = AndOperation(Word("foo"), Proximity(Phrase('"bar"'), 2))
 60 |         visitor = self.TrackingParentsVisitor(track_parents=True)
 61 |         nodes = visitor.visit(tree)
 62 |         self.assertListEqual(
 63 |             [
 64 |                 (tree, None),
 65 |                 (Word("foo"), (tree,)),
 66 |                 (Proximity(Phrase('"bar"'), degree=2), (tree,)),
 67 |                 (Phrase('"bar"'), (tree, Proximity(Phrase('"bar"'), 2))),
 68 |             ],
 69 |             nodes,
 70 |         )
 71 | 
 72 |     def test_parents_tracking_no_tracking(self):
 73 |         tree = AndOperation(Word("foo"), Phrase('"bar"'))
 74 |         # no parents tracking !
 75 |         visitor = self.TrackingParentsVisitor()
 76 |         nodes = visitor.visit(tree)
 77 |         self.assertListEqual([(tree, None), (Word("foo"), None), (Phrase('"bar"'), None)], nodes)
 78 | 
 79 |     def test_mro(self):
 80 |         visitor = self.MROVisitor()
 81 | 
 82 |         tree = OrOperation(Word('a'), Word('b'))
 83 |         result = visitor.visit(tree)
 84 |         self.assertEqual(list(result), ['a OR b', 'a', 'b'])
 85 | 
 86 |         # AndOperation has no specific method,
 87 |         # but inherists BaseOperation, hence uses visit_base_operation
 88 |         tree = AndOperation(Word('a'), Word('b'))
 89 |         result = visitor.visit(tree)
 90 |         self.assertEqual(list(result), ['a BASE_OP b', 'a', 'b'])
 91 | 
 92 | 
 93 | class TreeTransformerTestCase(TestCase):
 94 | 
 95 |     class BasicTransformer(TreeTransformer):
 96 |         """
 97 |         Dummy transformer that simply turn any Word node's value into "lol"
 98 |         """
 99 |         def visit_word(self, node, context):
100 |             yield Word(context.get("replacement", 'lol'))
101 | 
102 |         def visit_phrase(self, node, context):
103 |             yield from []
104 | 
105 |         def visit_base_operation(self, node, context):
106 |             new_node, = super().generic_visit(node, context)
107 |             # if new_node has no operands, it's like a removal
108 |             if len(new_node.children) == 0:
109 |                 return
110 |             # if we have only one operands return it
111 |             elif len(new_node.children) == 1:
112 |                 yield new_node.children[0]
113 |             else:
114 |                 # normal return
115 |                 yield new_node
116 | 
117 |     class TrackingParentsTransformer(TreeTransformer):
118 | 
119 |         def visit_word(self, node, context):
120 |             new_node, = self.generic_visit(node, context)
121 |             if any(isinstance(p, SearchField) for p in context["new_parents"]):
122 |                 new_node.value = "lol"
123 |             yield new_node
124 | 
125 |     class RaisingTreeTransformer(TreeTransformer):
126 | 
127 |         def generic_visit(self, node, context):
128 |             yield node
129 |             yield node
130 | 
131 |     class RaisingTreeTransformer2(TreeTransformer):
132 | 
133 |         def generic_visit(self, node, context):
134 |             raise ValueError("Random error")
135 | 
136 |     def test_basic_traversal(self):
137 |         tree = AndOperation(Word("foo"), Word("bar"))
138 | 
139 |         transformer = self.BasicTransformer()
140 |         new_tree = transformer.visit(tree)
141 |         self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol")))
142 | 
143 |     def test_context_value(self):
144 |         tree = AndOperation(Word("foo"), Word("bar"))
145 | 
146 |         transformer = self.BasicTransformer()
147 |         new_tree = transformer.visit(tree, context={"replacement": "rotfl"})
148 |         self.assertEqual(new_tree, AndOperation(Word("rotfl"), Word("rotfl")))
149 | 
150 |     def test_no_transform(self):
151 |         tree = AndOperation(NONE_ITEM, NONE_ITEM)
152 |         transformer = self.BasicTransformer()
153 |         new_tree = transformer.visit(tree)
154 |         self.assertEqual(new_tree, tree)
155 | 
156 |     def test_one_word(self):
157 |         tree = Word("foo")
158 |         transformer = self.BasicTransformer()
159 |         new_tree = transformer.visit(tree)
160 |         self.assertEqual(new_tree, Word("lol"))
161 | 
162 |     def test_tracking_parents(self):
163 |         tree = OrOperation(Word("foo"), SearchField("test", Word("bar")))
164 |         expected = OrOperation(Word("foo"), SearchField("test", Word("lol")))
165 |         transformer = self.TrackingParentsTransformer(track_new_parents=True)
166 |         new_tree = transformer.visit(tree)
167 |         self.assertEqual(new_tree, expected)
168 | 
169 |     def test_removal(self):
170 |         tree = AndOperation(
171 |             OrOperation(Word("spam"), Word("ham")),
172 |             AndOperation(Word("foo"), Phrase('"bar"')),
173 |             AndOperation(Phrase('"baz"'), Phrase('"biz"')),
174 |         )
175 | 
176 |         transformer = self.BasicTransformer()
177 |         new_tree = transformer.visit(tree)
178 | 
179 |         self.assertEqual(
180 |             new_tree,
181 |             AndOperation(OrOperation(Word("lol"), Word("lol")), Word("lol")),
182 |         )
183 | 
184 |     def test_silent_value_error(self):
185 |         # in the case some attribute mislead the search for node do not raise
186 |         tree = AndOperation(Word("a"), Word("b"))
187 |         setattr(tree, "misleading1", ())
188 |         setattr(tree, "misleading2", [])
189 |         # hackishly patch __dict__ to be sure we have operands in right order for test
190 |         tree.__dict__ = collections.OrderedDict(tree.__dict__)
191 |         tree.__dict__['operands'] = tree.__dict__.pop('operands')  # operands are now last
192 | 
193 |         transformer = self.BasicTransformer()
194 |         new_tree = transformer.visit(tree)
195 | 
196 |         self.assertEqual(new_tree, AndOperation(Word("lol"), Word("lol")))
197 | 
198 |     def test_repeating_expression(self):
199 |         # non regression test
200 |         tree = AndOperation(
201 |             Group(OrOperation(Word('bar'), Word('foo'))),
202 |             Group(OrOperation(Word('bar'), Word('foo'), Word('spam'))),
203 |         )
204 |         # basic transformer should not change tree
205 |         same_tree = TreeTransformer().visit(copy.deepcopy(tree))
206 |         self.assertEqual(same_tree, tree)
207 | 
208 |     def test_more_than_one_element_raises(self):
209 |         tree = Word("foo")
210 |         with self.assertRaises(ValueError) as raised:
211 |             self.RaisingTreeTransformer().visit(tree)
212 |         self.assertIn(
213 |             "The visit of the tree should have produced exactly one element",
214 |             str(raised.exception),
215 |         )
216 | 
217 |     def test_value_error_pass_through(self):
218 |         # raising a value error that is not related to unpacking passed through
219 |         tree = Word("foo")
220 |         with self.assertRaises(ValueError) as raised:
221 |             self.RaisingTreeTransformer2().visit(tree)
222 |         self.assertEqual("Random error", str(raised.exception))
223 | 
224 | 
225 | class PathTrackingVisitorTestCase(TestCase):
226 | 
227 |     class TermPathVisitor(PathTrackingVisitor):
228 | 
229 |         def visit_term(self, node, context):
230 |             yield (context["path"], node.value)
231 | 
232 |     @classmethod
233 |     def setUpClass(cls):
234 |         cls.visit = cls.TermPathVisitor().visit
235 | 
236 |     def test_visit_simple_term(self):
237 |         paths = self.visit(Word("foo"))
238 |         self.assertEqual(paths, [((), "foo")])
239 | 
240 |     def test_visit_complex(self):
241 |         tree = AndOperation(
242 |             Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))),
243 |             Proximity(Phrase('"spam ham"')),
244 |             SearchField("fizz", Regex("/fuzz/")),
245 |         )
246 |         paths = self.visit(tree)
247 |         self.assertEqual(
248 |             sorted(paths),
249 |             [
250 |                 ((0, 0, 0), "foo"),
251 |                 ((0, 0, 1), "bar"),
252 |                 ((0, 0, 2, 0, 0), "baz"),
253 |                 ((1, 0), '"spam ham"'),
254 |                 ((2, 0), '/fuzz/'),
255 |             ]
256 |         )
257 | 
258 | 
259 | class PathTrackingTransformerTestCase(TestCase):
260 | 
261 |     class TermPathTransformer(PathTrackingTransformer):
262 | 
263 |         def visit_term(self, node, context):
264 |             path = '-'.join(str(i) for i in context['path'])
265 |             quote = '"' if isinstance(node, Phrase) else "/" if isinstance(node, Regex) else ""
266 |             value = node.value.strip(quote)
267 |             new_node = node.clone_item(value=f"{quote}{value}@{path}{quote}")
268 |             yield new_node
269 | 
270 |     @classmethod
271 |     def setUpClass(cls):
272 |         cls.transform = cls.TermPathTransformer().visit
273 | 
274 |     def test_visit_simple_term(self):
275 |         tree = self.transform(Word("foo"))
276 |         self.assertEqual(tree, Word("foo@"))
277 | 
278 |     def test_visit_complex(self):
279 |         tree = AndOperation(
280 |             Group(OrOperation(Word("foo"), Word("bar"), Boost(Fuzzy(Word("baz")), force=2))),
281 |             Proximity(Phrase('"spam ham"')),
282 |             SearchField("fizz", Regex("/fuzz/")),
283 |         )
284 |         transformed = self.transform(tree)
285 |         expected = AndOperation(
286 |             Group(OrOperation(
287 |                 Word("foo@0-0-0"),
288 |                 Word("bar@0-0-1"),
289 |                 Boost(Fuzzy(Word("baz@0-0-2-0-0")), force=2),
290 |             )),
291 |             Proximity(Phrase('"spam ham@1-0"')),
292 |             SearchField("fizz", Regex("/fuzz@2-0/")),
293 |         )
294 |         self.assertEqual(transformed, expected)
295 | 


--------------------------------------------------------------------------------