├── .coveragerc ├── .flake8 ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGES.rst ├── CONTRIBUTORS.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bench.py ├── docs ├── Makefile ├── api.rst ├── conf.py ├── index.rst ├── make.bat ├── mlt.rst ├── query.rst └── usage.rst ├── pyproject.toml ├── scorched ├── __init__.py ├── compat.py ├── connection.py ├── dates.py ├── exc.py ├── response.py ├── search.py ├── strings.py ├── testing.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── data │ └── lipsum.pdf │ ├── docker-compose.yml │ ├── dumps │ ├── books.json │ ├── request_error.json │ ├── request_hl.json │ ├── request_hl_grouped.json │ ├── request_w_facets.json │ └── request_w_termvector.json │ ├── schema.py │ ├── solrconfig.xml │ ├── solrconfig_8.11.xml │ ├── test_connection.py │ ├── test_dates.py │ ├── test_functional.py │ ├── test_response.py │ ├── test_search.py │ ├── test_strings.py │ └── test_testing.py ├── setup.cfg ├── setup.py ├── testing-solr.sh └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | omit = 3 | */python?.?/* 4 | */pypy/* 5 | */site-packages/nose/* 6 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | #ignore = E203, E266, E501, W503, F403, F401 3 | max-line-length = 89 4 | max-complexity = 18 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | *eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | env 22 | _build 23 | 24 | # Installer logs 25 | pip-log.txt 26 | 27 | # Unit test / coverage reports 28 | .coverage 29 | .tox 30 | nosetests.xml 31 | 32 | # Translations 33 | *.mo 34 | 35 | # Mr Developer 36 | .mr.developer.cfg 37 | .project 38 | .pydevproject 39 | 40 | # vim 41 | *.swp 42 | 43 | # potential solr downloads 44 | solr-* 45 | 46 | man 47 | pyvenv.cfg 48 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v3.4.0 5 | hooks: 6 | - id: trailing-whitespace 7 | - id: end-of-file-fixer 8 | - id: fix-encoding-pragma 9 | args: [--remove] 10 | - id: check-yaml 11 | - id: debug-statements 12 | language_version: python3 13 | - id: check-xml 14 | types: [file] 15 | files: \.zcml$|\.xml$ 16 | - repo: https://github.com/timothycrosley/isort 17 | rev: 5.7.0 18 | hooks: 19 | - id: isort 20 | files: \.py$ 21 | - repo: https://github.com/psf/black 22 | rev: 21.6b0 23 | hooks: 24 | - id: black 25 | # - repo: https://github.com/pre-commit/mirrors-mypy 26 | # rev: 'v0.910' # Use the sha / tag you want to point at 27 | # hooks: 28 | # - id: mypy 29 | # additional_dependencies: [types-requests, types-setuptools] 30 | - repo: https://gitlab.com/pycqa/flake8 31 | rev: 3.8.4 32 | hooks: 33 | - id: flake8 34 | language_version: python3 35 | additional_dependencies: [flake8-typing-imports==1.9.0] 36 | - repo: https://github.com/mgedmin/check-manifest 37 | rev: "0.46" 38 | hooks: 39 | - id: check-manifest 40 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | 1.0.0.0b3 (unreleased) 5 | ---------------------- 6 | 7 | - Nothing changed yet. 8 | 9 | 10 | 1.0.0.0b2 (2022-03-21) 11 | ---------------------- 12 | 13 | - Fix last version: pep440 does not support semantic versioning 14 | as supposed by https://semver.org (neither 1.0 no 2.0). 15 | openstack suggests a modified semver as described here: 16 | https://docs.openstack.org/pbr/latest/user/semver.html 17 | 18 | 19 | 1.0.0-beta.1 (2022-03-21) 20 | ------------------------- 21 | 22 | Backwards incompatible changes 23 | ++++++++++++++++++++++++++++++ 24 | 25 | - In Response.facet_counts the default value for 26 | `facet_counts.facet_pivot` is now `{}` instead of `(,)` if 27 | `facet_pivot` was not in the solr response json 28 | 29 | - Remove support for Python 2.7 30 | 31 | Bug fixes 32 | +++++++++ 33 | 34 | - Added missing mlt parameter (mlissner) 35 | 36 | Features 37 | ++++++++ 38 | 39 | - interpret fields of type `date` and/or `pdate` as 40 | solr datefields 41 | 42 | - Python 3.9 is now the baseline. 43 | 44 | 45 | 0.13.0 (2020-01-25) 46 | ------------------- 47 | 48 | - Add support for Python 3.6, Python3.7 and Python 3.8 (ale-rt) 49 | 50 | 51 | 0.12 (2017-03-16) 52 | ----------------- 53 | 54 | - Add extract handler support (mamico) 55 | 56 | - Allow user to pass an http_connection to SolrInterface ctor (quinot) 57 | 58 | - ``BaseSearch`` has now a ``count`` function (mlissner) 59 | 60 | - Support atomic updates (ale-rt) 61 | 62 | 63 | 0.11.0 (2016-10-11) 64 | ------------------- 65 | 66 | - Highlighting is now available in the result documents as the 67 | ``solr_highlights`` field (mlissner) 68 | 69 | - smaller documentation cleanup 70 | 71 | 72 | 0.10.2 (2016-09-27) 73 | ------------------- 74 | 75 | - Fix is_iter implementation #30 (mamico) 76 | 77 | - Multi-value date fields work (mlissner) 78 | 79 | - Fixes error in the readme so that DEBUG mode works as documented (mlissner) 80 | 81 | 82 | 0.10.1 (2016-06-15) 83 | ------------------- 84 | 85 | - Fixing setup.py classifier. 86 | 87 | 88 | 0.10 (2016-06-15) 89 | ----------------- 90 | 91 | - Return response for update actions (mamico) 92 | 93 | - Add support for Solr cursors (Chronial) 94 | 95 | - Added stats option (rlskoeser) 96 | 97 | 98 | 0.9 (2015-11-09) 99 | ---------------- 100 | 101 | - Better check datetime dynamicfields (mamico) 102 | 103 | - RealTime Get (Chronial) 104 | 105 | - TermVector support (Chronial) 106 | 107 | 108 | 0.8 (2015-08-26) 109 | ---------------- 110 | 111 | - use compat.basestring over compat.str in date convert (mamico) 112 | 113 | - remove test from core requirements (mamico) 114 | 115 | - added search_timeout paramter to SolrConnection (mamico) 116 | 117 | - fix. Do not alter documents while adding new documents 118 | 119 | 120 | 0.7 (2015-04-17) 121 | ---------------- 122 | 123 | - Test against Solr 4.10.2 and added Python 3.4 to travis. 124 | 125 | - Added support for dismax queries. 126 | 127 | - Added support edismax field aliases. 128 | 129 | - Added support for facet ranges. 130 | 131 | 132 | 0.6 (2014-06-23) 133 | ---------------- 134 | 135 | - Add spellchecking for scorched queries. (#9707) 136 | 137 | 138 | 0.5 (2014-06-05) 139 | ---------------- 140 | 141 | - Add `debugQuery` parameter to search. (#9903) 142 | 143 | - Add possibility to specify the request handler to use per query. (#9704) 144 | 145 | 146 | 0.4.1 (2014-04-16) 147 | ------------------ 148 | 149 | - Fixed again fields in field_limiter. 150 | 151 | 152 | 0.4 (2014-04-16) 153 | ---------------- 154 | 155 | - Fixed fields convert to arrays. 156 | 157 | - Added FacetPivotOptions. 158 | 159 | - Added PostingsHighlightOptions. 160 | 161 | - Added boundaryScanner to HighlightOptions. 162 | 163 | 164 | 0.3 (2014-04-03) 165 | ---------------- 166 | 167 | - Makes SolrResponse iterable. 168 | 169 | 170 | 0.2 (2014-03-24) 171 | ---------------- 172 | 173 | - Added more tests 174 | 175 | - Added description in setup.py 176 | 177 | 178 | 0.1 (2014-03-20) 179 | ---------------- 180 | 181 | - Python 3 182 | 183 | - Cleaner api moved redundant functions 184 | 185 | - Cleaner api removed filter_exclude use ~si.Q() 186 | 187 | - Cleaner api removed exclude use ~si.Q() 188 | 189 | - Fixed mlt_search (mlt component and handler) 190 | 191 | - Removed mx.DateTime 192 | 193 | - Removed redundant more_like_this 194 | 195 | - Offspring of sunburnt is born 196 | -------------------------------------------------------------------------------- /CONTRIBUTORS.rst: -------------------------------------------------------------------------------- 1 | Contributors 2 | ============ 3 | 4 | - Alessandro Pisa 5 | 6 | - Mauro Amico 7 | 8 | - Josip Delic 9 | 10 | - Janko Hauser 11 | 12 | - Mark E. Haase 13 | 14 | - Mike Lissner 15 | 16 | - Thomas Quinot 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, 2010, 2011 Toby White 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.py 2 | include *.sh 3 | include *.rst 4 | 5 | recursive-include docs *.bat 6 | recursive-include docs *.py 7 | recursive-include docs *.rst 8 | recursive-include docs Makefile 9 | recursive-include scorched *.json 10 | recursive-include scorched *.pdf 11 | recursive-include scorched *.xml 12 | recursive-include scorched *.yml 13 | 14 | exclude .coveragerc 15 | exclude tox.ini 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Scorched 2 | ======== 3 | 4 | Scorched is a sunburnt offspring and like all offspring it tries to make 5 | things better or at least different. 6 | 7 | Git Repository and issue tracker: https://github.com/lugensa/scorched 8 | 9 | Documentation: http://scorched.readthedocs.org/en/latest/ 10 | 11 | .. |travisci| image:: https://travis-ci.org/lugensa/scorched.png 12 | .. _travisci: https://travis-ci.org/lugensa/scorched 13 | 14 | .. image:: https://coveralls.io/repos/lugensa/scorched/badge.png 15 | :target: https://coveralls.io/r/lugensa/scorched 16 | 17 | |travisci|_ 18 | 19 | .. _Solr : http://lucene.apache.org/solr/ 20 | .. _Lucene : http://lucene.apache.org/java/docs/index.html 21 | 22 | 23 | Following some major differences: 24 | 25 | - No validation of queries in client code (make code much more lightweight) 26 | 27 | - Send and receive as json. (Faster 20k docs from 6.5s to 1.3s) 28 | 29 | - API is more lightweight e.g. ``add`` consumes now only dicts. 30 | 31 | - Wildcard search strings need to be explicitly set. 32 | 33 | - Python 3 34 | 35 | - Drops support for Solr < 4.3.0 36 | 37 | - ... 38 | 39 | 40 | Local testing 41 | ============= 42 | 43 | We changed to pytest and pytest-docker to spin-off 44 | the tests. 45 | 46 | The account on your os under which you run the tests 47 | should have permissions to start docker processes. 48 | 49 | First checkout the sources:: 50 | 51 | https://github.com/lugensa/scorched.git 52 | 53 | Now use tox for testing:: 54 | 55 | cd scorched 56 | tox 57 | 58 | Additionally use pytest directly:: 59 | 60 | cd scorched 61 | python3.10 -mvenv . 62 | ./bin/pip install -e .[test] 63 | ./bin/pytest ./scorched 64 | 65 | Running the tests will start a solr-8.11.1 in docker 66 | (see scorched/tests/docker-compose.yml). 67 | -------------------------------------------------------------------------------- /bench.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | import scorched 4 | import time 5 | import datetime 6 | 7 | from matplotlib import pyplot 8 | from scorched.compat import is_py2 9 | 10 | if is_py2: 11 | import sunburnt 12 | 13 | 14 | def build(n): 15 | docs = [] 16 | for i in range(n): 17 | doc = {'author_t': 'George R.R. Martin', 18 | 'cat': 'book', 19 | 'date_dt': datetime.datetime(2014, 3, 11, 10, 49, 0, 747991), 20 | 'genre_s': 'fantasy', 21 | 'id': '%s' % i, 22 | 'inStock': True, 23 | 'name': 'A fisch of Thrones', 24 | 'price': 7.99, 25 | 'sequence_i': 1, 26 | 'series_t': 'A Song of Ice and Fire'} 27 | docs.append(doc) 28 | return docs 29 | 30 | 31 | def run(n, interface): 32 | docs = build(n) 33 | si = interface("http://localhost:8983/solr/") 34 | start = time.clock() 35 | si.add(docs) 36 | si.commit() 37 | elapsed = (time.clock() - start) 38 | print("%s docs took %ss" % (len(docs), elapsed)) 39 | query = si.query(name='fisch') 40 | res = si.search(**query.options()) 41 | print("found %s" % res.result.numFound) 42 | si.delete_all() 43 | si.commit() 44 | return {'x': n, 'y': elapsed} 45 | 46 | count = 21 47 | if is_py2: 48 | data_sunburnt = [] 49 | for i in [x*1000 for x in range(1, count)]: 50 | data_sunburnt.append(run(i, sunburnt.SolrInterface)) 51 | 52 | data_scorched = [] 53 | for i in [x*1000 for x in range(1, count)]: 54 | data_scorched.append(run(i, scorched.SolrInterface)) 55 | 56 | if is_py2: 57 | pyplot.plot( 58 | [x['x'] for x in data_sunburnt], [y['y'] for y in data_sunburnt], '-') 59 | pyplot.plot( 60 | [x['x'] for x in data_scorched], [y['y'] for y in data_scorched], '-') 61 | pyplot.title('Plotting adding speed') 62 | pyplot.xlabel('Number documents') 63 | pyplot.ylabel('Time in seconds (less is better)') 64 | if is_py2: 65 | pyplot.legend(['sunburnt', 'scorched']) 66 | else: 67 | pyplot.legend(['scorched']) 68 | pyplot.savefig('bench.png') 69 | pyplot.show() 70 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = ../env/bin/sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/scorched.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/scorched.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/scorched" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/scorched" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | Scorched API 4 | ============ 5 | 6 | API 7 | --- 8 | 9 | .. automodule:: scorched.connection 10 | :members: grouper 11 | 12 | .. autoclass:: SolrConnection 13 | :members: 14 | 15 | .. automethod:: __init__ 16 | 17 | .. autoclass:: SolrInterface 18 | :members: 19 | 20 | .. automethod:: __init__ 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # scorched documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Mar 12 21:48:32 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.todo', 34 | 'sphinx.ext.viewcode', 35 | ] 36 | 37 | # Add any paths that contain templates here, relative to this directory. 38 | templates_path = ['_templates'] 39 | 40 | # The suffix of source filenames. 41 | source_suffix = '.rst' 42 | 43 | # The encoding of source files. 44 | #source_encoding = 'utf-8-sig' 45 | 46 | # The master toctree document. 47 | master_doc = 'index' 48 | 49 | # General information about the project. 50 | project = u'scorched' 51 | copyright = u'2014, Josip Delic' 52 | 53 | # The version info for the project you're documenting, acts as replacement for 54 | # |version| and |release|, also used in various other places throughout the 55 | # built documents. 56 | # 57 | # The short X.Y version. 58 | version = '0.1' 59 | # The full version, including alpha/beta/rc tags. 60 | release = '0.1' 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | #language = None 65 | 66 | # There are two options for replacing |today|: either, you set today to some 67 | # non-false value, then it is used: 68 | #today = '' 69 | # Else, today_fmt is used as the format for a strftime call. 70 | #today_fmt = '%B %d, %Y' 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | exclude_patterns = ['_build'] 75 | 76 | # The reST default role (used for this markup: `text`) to use for all 77 | # documents. 78 | #default_role = None 79 | 80 | # If true, '()' will be appended to :func: etc. cross-reference text. 81 | #add_function_parentheses = True 82 | 83 | # If true, the current module name will be prepended to all description 84 | # unit titles (such as .. function::). 85 | #add_module_names = True 86 | 87 | # If true, sectionauthor and moduleauthor directives will be shown in the 88 | # output. They are ignored by default. 89 | #show_authors = False 90 | 91 | # The name of the Pygments (syntax highlighting) style to use. 92 | pygments_style = 'sphinx' 93 | 94 | # A list of ignored prefixes for module index sorting. 95 | #modindex_common_prefix = [] 96 | 97 | # If true, keep warnings as "system message" paragraphs in the built documents. 98 | #keep_warnings = False 99 | 100 | 101 | # -- Options for HTML output ---------------------------------------------- 102 | 103 | # The theme to use for HTML and HTML Help pages. See the documentation for 104 | # a list of builtin themes. 105 | html_theme = 'default' 106 | 107 | # Theme options are theme-specific and customize the look and feel of a theme 108 | # further. For a list of options available for each theme, see the 109 | # documentation. 110 | #html_theme_options = {} 111 | 112 | # Add any paths that contain custom themes here, relative to this directory. 113 | #html_theme_path = [] 114 | 115 | # The name for this set of Sphinx documents. If None, it defaults to 116 | # " v documentation". 117 | #html_title = None 118 | 119 | # A shorter title for the navigation bar. Default is the same as html_title. 120 | #html_short_title = None 121 | 122 | # The name of an image file (relative to this directory) to place at the top 123 | # of the sidebar. 124 | #html_logo = None 125 | 126 | # The name of an image file (within the static path) to use as favicon of the 127 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 128 | # pixels large. 129 | #html_favicon = None 130 | 131 | # Add any paths that contain custom static files (such as style sheets) here, 132 | # relative to this directory. They are copied after the builtin static files, 133 | # so a file named "default.css" will overwrite the builtin "default.css". 134 | html_static_path = ['_static'] 135 | 136 | # Add any extra paths that contain custom files (such as robots.txt or 137 | # .htaccess) here, relative to this directory. These files are copied 138 | # directly to the root of the documentation. 139 | #html_extra_path = [] 140 | 141 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 142 | # using the given strftime format. 143 | #html_last_updated_fmt = '%b %d, %Y' 144 | 145 | # If true, SmartyPants will be used to convert quotes and dashes to 146 | # typographically correct entities. 147 | #html_use_smartypants = True 148 | 149 | # Custom sidebar templates, maps document names to template names. 150 | #html_sidebars = {} 151 | 152 | # Additional templates that should be rendered to pages, maps page names to 153 | # template names. 154 | #html_additional_pages = {} 155 | 156 | # If false, no module index is generated. 157 | #html_domain_indices = True 158 | 159 | # If false, no index is generated. 160 | #html_use_index = True 161 | 162 | # If true, the index is split into individual pages for each letter. 163 | #html_split_index = False 164 | 165 | # If true, links to the reST sources are added to the pages. 166 | #html_show_sourcelink = True 167 | 168 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 169 | #html_show_sphinx = True 170 | 171 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 172 | #html_show_copyright = True 173 | 174 | # If true, an OpenSearch description file will be output, and all pages will 175 | # contain a tag referring to it. The value of this option must be the 176 | # base URL from which the finished HTML is served. 177 | #html_use_opensearch = '' 178 | 179 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 180 | #html_file_suffix = None 181 | 182 | # Output file base name for HTML help builder. 183 | htmlhelp_basename = 'scorcheddoc' 184 | 185 | 186 | # -- Options for LaTeX output --------------------------------------------- 187 | 188 | latex_elements = { 189 | # The paper size ('letterpaper' or 'a4paper'). 190 | #'papersize': 'letterpaper', 191 | 192 | # The font size ('10pt', '11pt' or '12pt'). 193 | #'pointsize': '10pt', 194 | 195 | # Additional stuff for the LaTeX preamble. 196 | #'preamble': '', 197 | } 198 | 199 | # Grouping the document tree into LaTeX files. List of tuples 200 | # (source start file, target name, title, 201 | # author, documentclass [howto, manual, or own class]). 202 | latex_documents = [ 203 | ('index', 'scorched.tex', u'scorched Documentation', 204 | u'Josip Delic', 'manual'), 205 | ] 206 | 207 | # The name of an image file (relative to this directory) to place at the top of 208 | # the title page. 209 | #latex_logo = None 210 | 211 | # For "manual" documents, if this is true, then toplevel headings are parts, 212 | # not chapters. 213 | #latex_use_parts = False 214 | 215 | # If true, show page references after internal links. 216 | #latex_show_pagerefs = False 217 | 218 | # If true, show URL addresses after external links. 219 | #latex_show_urls = False 220 | 221 | # Documents to append as an appendix to all manuals. 222 | #latex_appendices = [] 223 | 224 | # If false, no module index is generated. 225 | #latex_domain_indices = True 226 | 227 | 228 | # -- Options for manual page output --------------------------------------- 229 | 230 | # One entry per manual page. List of tuples 231 | # (source start file, name, description, authors, manual section). 232 | man_pages = [ 233 | ('index', 'scorched', u'scorched Documentation', 234 | [u'Josip Delic'], 1) 235 | ] 236 | 237 | # If true, show URL addresses after external links. 238 | #man_show_urls = False 239 | 240 | 241 | # -- Options for Texinfo output ------------------------------------------- 242 | 243 | # Grouping the document tree into Texinfo files. List of tuples 244 | # (source start file, target name, title, author, 245 | # dir menu entry, description, category) 246 | texinfo_documents = [ 247 | ('index', 'scorched', u'scorched Documentation', 248 | u'Josip Delic', 'scorched', 'One line description of project.', 249 | 'Miscellaneous'), 250 | ] 251 | 252 | # Documents to append as an appendix to all manuals. 253 | #texinfo_appendices = [] 254 | 255 | # If false, no module index is generated. 256 | #texinfo_domain_indices = True 257 | 258 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 259 | #texinfo_show_urls = 'footnote' 260 | 261 | # If true, do not generate a @detailmenu in the "Top" node's menu. 262 | #texinfo_no_detailmenu = False 263 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. scorched documentation master file, created by 2 | sphinx-quickstart on Wed Mar 12 21:48:32 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to scorched's documentation! 7 | ==================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | usage.rst 15 | query.rst 16 | mlt.rst 17 | api.rst 18 | 19 | 20 | Indices and tables 21 | ================== 22 | 23 | * :ref:`genindex` 24 | * :ref:`modindex` 25 | * :ref:`search` 26 | 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\scorched.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\scorched.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/mlt.rst: -------------------------------------------------------------------------------- 1 | .. _mlt: 2 | 3 | More Like This queries 4 | ====================== 5 | 6 | More Like This (MLT) is a feature of Solr which provides for comparisons of 7 | documents; you can ask Solr to tell you about any More documents it has that 8 | are Like This one. 9 | 10 | An MLT query can be part of a standard query (see 11 | :ref:`standard-query-more-like-this`.), in which case you're asking Solr to 12 | tell you not only about immediate query results, but also about any other 13 | results which are similar to the results you've got. 14 | 15 | Alternatively, you can feed Solr an entire document that is not already in its 16 | index, and ask to do an MLT query on that document. 17 | 18 | The first case is covered above in :ref:`standard-query-more-like-this`; the 19 | second case we'll show here. 20 | 21 | Basic MLT query 22 | --------------- 23 | 24 | Instead of calling the ``query`` method on the interface, we call the 25 | ``mlt_query`` method. 26 | 27 | :: 28 | 29 | >>> si.mlt_query(fields="name", content=open("localfile").read()) 30 | 31 | We give the MLT handler some content (sourced in this case from a local file); 32 | the MLT query will take this text, analyze it, and retrieve documents that are 33 | similar according to the results of its analysis. 34 | 35 | The results are returned in the same format as illustrated in the ``mlt()`` 36 | method. 37 | 38 | Further MLT query options 39 | ------------------------- 40 | 41 | If we wanted similarity to be calculated with respect to a different field or 42 | fields.: 43 | 44 | :: 45 | 46 | >>> si.mlt_query(content=open("localfile").read(), 47 | ... fields=["name", "author_t"]) 48 | 49 | We can understand a little more about why we get the results we do by asking 50 | for the result of the MLT document analysis. 51 | 52 | :: 53 | 54 | >>> si.mlt_query(fields="name", content=open("localfile").read(), 55 | ... interestingTerms="list") 56 | >>> si.mlt_query(fields="name", content=open("localfile").read(), 57 | ... interestingTerms="details") 58 | 59 | "list" will return a list of the interesting terms extracted; "details" will 60 | also provide details of the boost used for each term. 61 | 62 | If the document you're supplying is not encoded in UTF-8 (or equivalently 63 | ASCII) format, then you need to specify the charset in use (using the list 64 | available at http://docs.python.org/library/codecs.html#standard-encodings: 65 | 66 | :: 67 | 68 | >>> si.mlt_query(fields="name", content=open("localfile").read(), 69 | ... content_charset="iso-8859-1") 70 | 71 | Sourcing content from the web 72 | ----------------------------- 73 | 74 | You can also choose to tell Solr to source the document from the web, by giving 75 | the URL for the content rather than supplying it yourself: 76 | 77 | :: 78 | 79 | >>> si.mlt_query(fields="name", url="http://example.com/document") 80 | 81 | All the other options above still apply to URL-sourced content, except for 82 | "content_charset"; that's up to the webserver where the content is stored. 83 | 84 | In all the cases above, you can also specify any of the other options shown in 85 | ``mlt()``, apart from "count". 86 | 87 | MLT queries on indexed content 88 | ------------------------------ 89 | 90 | You can perform an MLT query on indexed content in the following way: 91 | 92 | :: 93 | 94 | >>> res = si.mlt_query("genre_s", interestingTerms="details", 95 | ... mintf=1, mindf=1).query( 96 | ... id="978-0641723445").execute() 97 | >>> res.result.docs 98 | [{u'_version_': 1462917302263480320, 99 | u'author': u'Rick Riordan', 100 | u'author_s': u'Rick Riordan', 101 | u'cat': [u'book', u'paperback'], 102 | u'genre_s': u'fantasy', 103 | u'id': u'978-1423103349', 104 | u'inStock': True, 105 | u'name': u'The Sea of Monsters', 106 | u'pages_i': 304, 107 | u'price': 6.49, 108 | u'price_c': u'6.49,USD', 109 | u'sequence_i': 2, 110 | u'series_t': u'Percy Jackson and the Olympians'}, 111 | {u'_version_': 1462917302263480321, 112 | u'author': u'Jostein Gaarder', 113 | u'author_s': u'Jostein Gaarder', 114 | u'cat': [u'book', u'paperback'], 115 | u'genre_s': u'fantasy', 116 | u'id': u'978-1857995879', 117 | u'inStock': True, 118 | u'name': u"Sophie's World : The Greek Philosophers", 119 | u'pages_i': 64, 120 | u'price': 3.07, 121 | u'price_c': u'3.07,USD', 122 | u'sequence_i': 1}] 123 | >>> res.interesting_terms 124 | >>> [u'genre_s:fantasy', 1.0] 125 | 126 | ie - initialize an otherwise empty mlt_query object, and then run queries on it 127 | as you would run normal queries. The full range of query operations is 128 | supported when composing the query for indexed content: 129 | 130 | :: 131 | 132 | >>> si.mlt_query("name").query(title='Whale').query(~si.Q( 133 | ... author='Melville').query(si.Q('Moby') | si.Q('Dick')) 134 | 135 | Chaining MLT queries 136 | -------------------- 137 | 138 | The ``mlt_query()`` method is chainable in the same way as the ``query`` 139 | method. There are a fre differences to note. 140 | 141 | * You can't chain a ``query()`` onto an ``mlt_query()`` call 142 | if the MLT query is based on supplied ``content`` or ``url``. 143 | * You can't chain multiple ``mlt_query()`` methods together - only one content 144 | source can be considered at a time. 145 | 146 | The ``mlt_query()`` method takes all of the mlt() options except "count". 147 | -------------------------------------------------------------------------------- /docs/query.rst: -------------------------------------------------------------------------------- 1 | .. _querying: 2 | 3 | Querying 4 | ======== 5 | 6 | For the examples in this chapter, I'll be assuming that you've loaded your 7 | server up with the books data supplied with the example Solr setup. 8 | 9 | The data itself you can see at 10 | ``$SOLR_SOURCE_DIR/example/exampledocs/books.json``. To load it into a server 11 | running with the example schema: 12 | 13 | :: 14 | 15 | $ cd example/exampledocs 16 | $ curl 'http://localhost:8983/solr/update/json?commit=true' --data-binary \ 17 | @exampledocs/books.json -H 'Content-type:application/json' 18 | 19 | Searching solr 20 | -------------- 21 | 22 | Scorched uses a chaining API, and will hopefully look quite familiar to anyone 23 | who has used the Django ORM. 24 | 25 | The ``books.json`` data looked like this:: 26 | 27 | [ 28 | { 29 | "id" : "978-0641723445", 30 | "cat" : ["book","hardcover"], 31 | "name" : "The Lightning Thief", 32 | "author" : "Rick Riordan", 33 | "series_t" : "Percy Jackson and the Olympians", 34 | "sequence_i" : 1, 35 | "genre_s" : "fantasy", 36 | "inStock" : true, 37 | "price" : 12.50, 38 | "pages_i" : 384 39 | } 40 | ... 41 | ] 42 | 43 | .. note:: Dynamic fields. 44 | 45 | Dynamic fields are named with a suffix (*_i, *_t, *_s). 46 | 47 | A simple search for one word, in the default search field. 48 | 49 | :: 50 | 51 | >>> si.query("thief") 52 | 53 | Maybe you want to search in the (non-default) field author for authors called 54 | Martin 55 | 56 | :: 57 | 58 | >>> si.query(author="rick") 59 | 60 | Maybe you want to search for books with "thief" in their title, by an author 61 | called "rick". 62 | 63 | :: 64 | 65 | >>> si.query(name="thief", author="rick") 66 | 67 | Perhaps your initial, default, search is more complex, and has more than one 68 | word in it: 69 | 70 | :: 71 | 72 | >>> si.query(name="lightning").query(name="thief") 73 | 74 | A easy way to see what sunburnt is producing is to call ``options``:: 75 | 76 | >>> si.query(name="lightning").query(name="thief").options() 77 | {'q': u'name:lightning AND name:thief'} 78 | 79 | Executing queries 80 | ----------------------------------------------- 81 | 82 | Scorched is lazy in constructing queries. The examples in the previous section 83 | don’t actually perform the query - they just create a "query object" with the 84 | correct parameters. To actually get the results of the query, you’ll need to 85 | execute it: 86 | 87 | :: 88 | 89 | >>> response = si.query("thief").execute() 90 | 91 | This will return a ``SolrResponse`` object. If you treat this object as a list, 92 | then each member of the list will be a document, in the form of a Python 93 | dictionary containing the relevant fields: 94 | 95 | For example, if you run the first example query above, you should see a 96 | response like this: 97 | 98 | :: 99 | 100 | >>> for result in si.query("thief").execute(): 101 | ... print result 102 | { 103 | u'name': u'The Lightning Thief', 104 | u'author': u'Rick Riordan', 105 | u'series_t': u'Percy Jackson and the Olympians', 106 | u'pages_i': 384, 107 | u'genre_s': u'fantasy', 108 | u'author_s': u'Rick Riordan', 109 | u'price': 12.5, 110 | u'price_c': u'12.5,USD', 111 | u'sequence_i': 1, 112 | u'inStock': True, 113 | u'_version_': 1462820023761371136, 114 | u'cat': [u'book', u'hardcover'], 115 | u'id': u'978-0641723445' 116 | } 117 | 118 | Of course, often you don’t want your results in the form of a dictionary, 119 | you want an object. Perhaps you have the following class defined in your code: 120 | 121 | :: 122 | 123 | >>> class Book: 124 | ... def __init__(self, name, author, **other_kwargs): 125 | ... self.title = name 126 | ... self.author = author 127 | ... self.other_kwargs = other_kwargs 128 | ... 129 | ... def __repr__(self): 130 | ... return 'Book("%s", "%s")' % (self.title, self.author) 131 | 132 | 133 | You can tell scorched to give you ``Book`` instances back by telling 134 | ``execute()`` to use the class as a constructor. 135 | 136 | :: 137 | 138 | >>> for result in si.query("game").execute(constructor=Book): 139 | ... print result 140 | Book("The Lightning Thief", "Rick Riordan") 141 | 142 | The ``constructor`` argument most often will be a class, but it can be any 143 | callable; it will always be called as ``constructor(**response_dict)``. 144 | 145 | 146 | You can extract more information from the response than simply the list of 147 | results. The SolrResponse object has the following attributes: 148 | 149 | * ``response.status`` : status of query. (status != 0 something went wrong). 150 | * ``response.QTime`` : how long did the query take in milliseconds. 151 | * ``response.params`` : the params that were used in the query. 152 | 153 | and the results themselves are in the following attributes 154 | 155 | * ``response.result`` : the results of your main query. 156 | * ``response.result.groups`` : see `Result greater`_ below. 157 | * ``response.facet_counts`` : see `Faceting`_ below. 158 | * ``response.highlighting`` : see `Highlighting`_ below. 159 | * ``response.more_like_these`` : see `More Like This`_ below. 160 | 161 | Finally, ``response.result`` itself has the following attributes 162 | 163 | * ``response.result.numFound`` : total number of docs found in the index. 164 | * ``response.result.docs`` : the actual results themselves. 165 | * ``response.result.start`` : if the number of docs is less than numFound, 166 | then this is the pagination offset. 167 | 168 | Pagination 169 | ---------- 170 | 171 | By default, Solr will only return the first 10 results (this is configurable in 172 | ``schema.xml``). To get at more results, you need to tell solr to paginate 173 | further through the results. You do this by applying the ``paginate()`` method, 174 | which takes two parameters, ``start`` and ``rows``: 175 | 176 | :: 177 | 178 | >>> si.query("black").paginate(start=10, rows=30) 179 | 180 | Cursors 181 | ------- 182 | If you want to get all / a huge number of results, you should use cursors to get 183 | the results in smaller chunks. Due to the way this is implemented in Solr, your 184 | sort needs to include your uniqueKey field. The ``cursor()`` method returns a 185 | cursor that you can iterate over. Like ``execute()``, ``cursor()`` takes an 186 | optional ``constructor`` parameter. In addition you can pass ``rows`` to define 187 | how many results should be fetched from Solr at once. 188 | 189 | :: 190 | 191 | >>> for item in si.query("black").sort_by('id').cursor(rows=100): ... 192 | 193 | Returning different fields 194 | -------------------------- 195 | 196 | By default, Solr will return all stored fields in the results. You might only 197 | be interested in a subset of those fields. To restrict the fields Solr returns, 198 | you apply the ``field_limit()`` methods. 199 | 200 | :: 201 | 202 | >>> si.query("game").field_limit("id") 203 | >>> si.query("game").field_limit(["id", "name"]) 204 | 205 | You can use the same option to get hold of the relevancy score that Solr 206 | has calculated for each document in the query: 207 | 208 | :: 209 | 210 | >>> si.query("game").field_limit(score=True) # Return the score alongside each document 211 | >>> si.query("game").field_limit("id", score=True") # return just the id and score. 212 | 213 | The results appear just like the normal dictionary responses, but with a different 214 | selection of fields. 215 | 216 | :: 217 | 218 | >>> for result in si.query("thief").field_limit("id", score=True"): 219 | ... print result 220 | {u'score': 0.6349302, u'id': u'978-0641723445'} 221 | 222 | More complex queries 223 | -------------------- 224 | 225 | In our books example, there are two numerical fields - the ``price`` (which is 226 | a float) and ``sequence_i`` (which is an integer). Numerical fields can be 227 | queried: 228 | 229 | * exactly 230 | * by comparison (``<`` / ``<=`` / ``>=`` / ``>``) 231 | * by range (between two values) 232 | 233 | Exact queries 234 | ~~~~~~~~~~~~~ 235 | 236 | Don't try and query floats exactly unless you really know what you're doing 237 | (http://download.oracle.com/docs/cd/E19957-01/806-3568/ncg_goldberg.html). Solr 238 | will let you, but you almost certainly don't want to. Querying integers exactly 239 | is fine though. 240 | 241 | :: 242 | 243 | >>> si.query(sequence_i=1) 244 | 245 | Comparison queries 246 | ~~~~~~~~~~~~~~~~~~ 247 | 248 | These use a new syntax: 249 | 250 | :: 251 | 252 | >>> si.query(price__lt=7) 253 | 254 | Notice the double-underscore separating "price" from "lt". It will search for 255 | all books whose price is less than 7. You can do similar searches on any float 256 | or integer field, and you can use: 257 | 258 | * ``gt`` : greater than, ``>`` 259 | * ``gte`` : greater than or equal to, ``>=`` 260 | * ``lt`` : less than, ``<`` 261 | * ``lte`` : less than or equal to, ``<=`` 262 | 263 | Range queries 264 | ~~~~~~~~~~~~~ 265 | 266 | As an extension of a comparison query, you can query for values that are within 267 | a range, ie between two different numbers. 268 | 269 | :: 270 | 271 | >>> si.query(price__range=(5, 7)) # all books with prices between 5 and 7. 272 | 273 | This range query is *inclusive* - it will return prices of books which are 274 | priced at exactly 5 or exactly 7. You can also make an *exclusive* search: 275 | 276 | :: 277 | 278 | >>> si.query(price__rangeexc=(5, 7)) 279 | 280 | Which will exclude books priced at exactly 5 or 7. 281 | 282 | Finally, you can also do a completely open range search: 283 | 284 | :: 285 | 286 | >>> si.query(price__any=True) 287 | 288 | Will search for a book which has *any* price. Why would you do this? Well, if 289 | you had a schema where price was *optional*, then this search would return all 290 | books which had a price - and exclude any books which didn’t have a price. 291 | 292 | Date queries 293 | ~~~~~~~~~~~~ 294 | 295 | You can query on dates the same way as you can query on numbers: exactly, by 296 | comparison, or by range. 297 | 298 | Be warned, though, that exact searching on date suffers from similar problems 299 | to exact searching on floating point numbers. Solr stores all dates to 300 | microsecond precision; exact searching will fail unless the date requested is 301 | also correct to microsecond precision. 302 | 303 | :: 304 | 305 | >>> si.query(date_dt=datetime.datetime(2006, 02, 13)) 306 | 307 | Will search for items whose manufacture date is *exactly* zero microseconds 308 | after midnight on the 13th February, 2006. 309 | 310 | More likely you'll want to search by comparison or by range: 311 | 312 | :: 313 | 314 | # all items after the 1st January 2006 315 | >>> si.query(date_dt__gt=datetime.datetime(2006, 1, 1)) 316 | 317 | # all items in Q1 2006. 318 | >>> si.query(date_dt__range=(datetime.datetime(2006, 1, 1), datetime.datetime(2006, 4, 1)) 319 | 320 | The argument to a date query can be any object that looks roughly like a Python 321 | ``datetime`` object or a string in W3C Datetime notation 322 | (http://www.w3.org/TR/NOTE-datetime) 323 | 324 | :: 325 | 326 | >>> si.query(date_dt__gte="2006") 327 | >>> si.query(date_dt__lt="2009-04-13") 328 | >>> si.query(date_dt__range=("2010-03-04 00:34:21", "2011-02-17 09:21:44")) 329 | 330 | Boolean fields 331 | ~~~~~~~~~~~~~~ 332 | 333 | Boolean fields are flags on a document. In the example hardware specs, 334 | documents carry an ``inStock`` field. We can select on that by doing: 335 | 336 | :: 337 | 338 | >>> si.query("thief", inStock=True) 339 | 340 | 341 | Sorting results 342 | --------------- 343 | 344 | Solr will return results in "relevancy" order. How Solr determines relevancy is 345 | a complex question, and can depend highly on your specific setup. However, it’s 346 | possible to override this and sort query results by another field. This field 347 | must be sortable, so most likely your'’d use a numerical or date field. 348 | 349 | :: 350 | 351 | >>> si.query("thief").sort_by("price") # ascending price 352 | >>> si.query("thief").sort_by("-price") # descending price 353 | 354 | You can also sort on multiple factors: 355 | 356 | :: 357 | 358 | >>> si.query("thief").sort_by("-price").sort_by("score") 359 | 360 | This query will sort first by descending price, and then by increasing "score" 361 | (which is what Solr calls relevancy). 362 | 363 | 364 | Complex queries 365 | --------------- 366 | 367 | Scorched queries can be chained together in all sorts of ways, with 368 | query terms being applied. 369 | 370 | What we do is construct two *query objects*, one for each condition, and ``OR`` 371 | them together. 372 | 373 | :: 374 | 375 | >>> si.query(si.Q("thief") | si.Q("sea")) 376 | 377 | The ``Q`` object can contain an arbitrary query, and can then be combined using 378 | Boolean logic (here, using ``|``, the OR operator). The result can then be 379 | passed to a normal ``si.query()`` call for execution. 380 | 381 | ``Q`` objects can be combined using any of the Boolean operators, so 382 | also ``&`` (``AND``) and ``~`` (``NOT``), and can be nested within each 383 | other. 384 | 385 | A moderately complex query could be written: 386 | 387 | :: 388 | 389 | >>> query = si.query(si.Q(si.Q("thief") & ~si.Q(author="ostein")) \ 390 | | si.Q(si.Q("foo") & ~si.Q(author="bui"))) 391 | 392 | Which will producse this query: 393 | 394 | :: 395 | 396 | >>> query.options() 397 | {'q': u'(thief AND (*:* AND NOT author:ostein)) OR (foo AND (*:* AND NOT author:bui))'} 398 | 399 | 400 | Excluding results from queries 401 | ------------------------------ 402 | 403 | If we want to *exclude* results by some criteria we use the ``~si.Q()``. 404 | 405 | :: 406 | 407 | >>> si.query(~si.Q(author="Rick Riordan")) 408 | 409 | 410 | Wildcard searching 411 | ------------------ 412 | 413 | You can use asterisks and question marks in the normal way, except that you may 414 | not use leading wildcards - ie no wildcards at the beginning of a term. 415 | 416 | Search for book with "thie" in the name: 417 | 418 | :: 419 | 420 | >>> si.query(name=scorched.strings.WildcardString("thie*")) 421 | 422 | If, for some reason, you want to search exactly for a string with an asterisk 423 | or a question mark in it then you need to tell Solr to special case it: 424 | 425 | :: 426 | 427 | >>> si.query(id=RawString("055323933?*")) 428 | 429 | This will search for a document whose id contains *exactly* the string given, 430 | including the question mark and asterisk. 431 | 432 | 433 | Filter queries 434 | -------------- 435 | 436 | Solr implements several internal caching layers, and to some extent you can 437 | control when and how they're used. 438 | 439 | Often, you find that you can partition your query; one part is run many times 440 | without change, or with very limited change, and another part varies much more. 441 | (See http://wiki.apache.org/solr/FilterQueryGuidance for more guidance.) 442 | 443 | If you taking search input from the user, you would write: 444 | 445 | :: 446 | 447 | >>> si.query(name=user_input).filter(price__lt=7.5) 448 | >>> si.query(name=user_input).filter(price__gte=7.5) 449 | 450 | Adding multiple filter:: 451 | 452 | >>> si.query(name="bla").filter(price__lt=7.5).filter(author="hans").options() 453 | {'fq': [u'author:hans', u'price:{* TO 7.5}'], 'q': u'name:bla'} 454 | 455 | 456 | You can filter any sort of query, simply by using ``filter()`` instead of 457 | ``query()``. And if your filtering involves an exclusion, then simple use 458 | ``~si.Q(author="lloyd")``. 459 | 460 | :: 461 | 462 | >>> si.query(title="black").filter(~si.Q(author="lloyd")).options() 463 | {'fq': u'NOT author:lloyd', 'q': u'title:black'} 464 | 465 | It's possible to mix and match ``query()`` and ``filter()`` calls as much as 466 | you like while chaining. The resulting filter queries will be combined and 467 | cached together. The argument to a ``filter()`` call can be an combination of 468 | ``si.Q`` objects. 469 | 470 | :: 471 | 472 | >>> si.query(title="black").filter( 473 | ... si.Q(si.Q(name="thief") & ~si.Q(author="ostein")) 474 | ... ).filter(si.Q(si.Q(title="foo") & ~si.Q(author="bui")) 475 | ... ).options() 476 | {'fq': [u'name:thief', u'title:foo', u'NOT author:ostein', u'NOT author:bui'], 477 | 'q': u'title:black'} 478 | 479 | Boosting 480 | --------- 481 | 482 | Solr provides a mechanism for "boosting" results according to the values of 483 | various fields (See 484 | http://wiki.apache.org/solr/SolrRelevancyCookbook#Boosting_Ranking_Terms for a 485 | full explanation). 486 | 487 | 488 | Boosts the importance of the author field by 3. 489 | 490 | :: 491 | 492 | >>> si.query(si.Q("black") | si.Q(author="lloyd")**3).options() 493 | {'q': u'black OR author:lloyd^3'} 494 | 495 | 496 | A more common pattern is that you want all books with "black" in the title *and 497 | you have a preference for those authored by Lloyd Alexander*. This is different 498 | from the last query; the last query would return books by Lloyd Alexander which 499 | did not have "black" in the title. Achieving this in Solr is possible, but a 500 | little awkward; scorched provides a shortcut for this pattern. 501 | 502 | :: 503 | 504 | >>> si.query("black").boost_relevancy(3, author_t="lloyd").options() 505 | {'q': u'black OR (black AND author_t:lloyd^3)'} 506 | 507 | This is fully chainable, and ``boost_relevancy`` can take an arbitrary 508 | collection of query objects. 509 | 510 | Faceting 511 | -------- 512 | 513 | For background, see http://wiki.apache.org/solr/SimpleFacetParameters. 514 | 515 | Scorched lets you apply faceting to any query, with the ``facet_by()`` method, 516 | chainable on a query object. The ``facet_by()`` method needs, at least, a field 517 | (or list of fields) to facet on: 518 | 519 | :: 520 | 521 | >>> facet_query = si.query("thief").facet_by("sequence_i").paginate(rows=0) 522 | 523 | The above fragment will search for game with "thrones" in the title, and facet 524 | the results according to the value of ``sequence_i``. It will also return zero 525 | results, just the facet output. 526 | 527 | :: 528 | 529 | >>> print facet_query.execute().facet_counts.facet_fields 530 | {u'sequence_i': [(u'1', 1), (u'2', 0)]} 531 | 532 | The ``facet_counts`` objects contains several sets of results - here, we're 533 | only interested in the ``facet_fields`` object. This contains a dictionary of 534 | results, keyed by each field where faceting was requested. The dictionary value 535 | is a list of two-tuples, mapping the value of the faceted field. 536 | 537 | You can facet on more than one field at a time: 538 | 539 | :: 540 | 541 | >>> si.query(...).facet_by(fields=["field1", "field2, ...]) 542 | 543 | The ``facet_fields`` dictionary will have more than one key. 544 | 545 | Solr supports a number of parameters to the faceting operation. All of the 546 | basic options are exposed through scorched: 547 | 548 | :: 549 | 550 | fields, prefix, sort, limit, offset, mincount, missing, method, 551 | enum.cache.minDf 552 | 553 | All of these can be used as keyword arguments to the ``facet()`` call, except 554 | of course the last one since it contains periods. To pass keyword arguments 555 | with periods in them, you can use `**` syntax: 556 | 557 | You can facet by ranges. The following query will return range facets over 558 | ``field1``: 0-10, 11-20, 21-30, etc. The ``mincount`` parameter can be used to 559 | return only those facets which contain a minimum number of results. 560 | 561 | :: 562 | 563 | >>> si.query(...).facet_range(fields='field1', start=0, gap=10, end=100, \ 564 | limit=10, mincount=1) 565 | 566 | Alternatively, you create ranges of dates using Solr's `date math` syntax. This 567 | next example creates a facet for each of the last 12 months. 568 | 569 | :: 570 | 571 | >>> si.query(...).facet_range(fields='field1', start='NOW-12MONTHS/MONTH', \ 572 | gap='+1MONTHS', end='NOW/MONTH') 573 | 574 | See 575 | https://cwiki.apache.org/confluence/display/solr/Working+with+Dates#WorkingwithDates-DateMath 576 | for more details on `date math` syntax. 577 | 578 | :: 579 | 580 | >>> facet(**{"enum.cache.minDf":25}) 581 | 582 | You can also facet on the result of one or more queries, using the 583 | ``facet_query()`` method. For example: 584 | 585 | :: 586 | 587 | >>> fquery = si.query("game").facet_query(price__lt=7).facet_query(price__gte=7) 588 | >>> print fquery.execute().facet_counts.facet_queries 589 | [('price:[7.0 TO *]', 1), ('price:{* TO 7.0}', 1)] 590 | 591 | This will facet the results according to the two queries specified, so you can 592 | see how many of the results cost less than 7, and how many cost more. 593 | 594 | The results come back this time in the ``facet_queries`` object, but have the 595 | same form as before. The facets are shown as a list of tuples, mapping query 596 | to number of results. 597 | 598 | Facet pivot TODO https://wiki.apache.org/solr/HierarchicalFaceting#Pivot_Facets 599 | 600 | Result grouping 601 | --------------- 602 | 603 | For background, see http://wiki.apache.org/solr/FieldCollapsing. 604 | 605 | Solr 3.3 added support for result grouping. 606 | 607 | An example call looks like this: 608 | 609 | :: 610 | 611 | >>> resp = si.query().group_by('genre_s', limit=10).execute() 612 | >>> for g in resp.groups['genre_s']['groups']: 613 | ... print "%s #%s" % (g['groupValue'], len(g['doclist']['docs'])) 614 | ... for d in g['doclist']['docs']: 615 | ... print "\t%s" % d['name'] 616 | fantasy #3 617 | The Lightning Thief 618 | The Sea of Monsters 619 | Sophie's World : The Greek Philosophers 620 | IT #1 621 | Lucene in Action, Second Edition 622 | 623 | Highlighting 624 | ------------ 625 | 626 | For background, see http://wiki.apache.org/solr/HighlightingParameters. 627 | 628 | Alongside the normal search results, you can ask Solr to return fragments of 629 | the documents, with relevant search terms highlighted. You do this with the 630 | chainable ``highlight()`` method. 631 | 632 | Specify which field we would like to see highlighted: 633 | 634 | :: 635 | 636 | >>> resp = si.query('thief').highlight('name').execute() 637 | >>> resp.highlighting 638 | {u'978-0641723445': {u'name': [u'The Lightning Thief']}} 639 | 640 | It is also possible to specify a array of fields:: 641 | 642 | >>> si.query('thief').highlight(['name', 'title']).options() 643 | {'hl': True, 'hl.fl': 'name,title', 'q': u'thief'} 644 | 645 | Highlighting values will also be included in ``response.result.doc` and grouped 646 | results as a ``solr_highlights` attribute so that they can be accessed during result 647 | iteration. 648 | 649 | PostingsHighlighter 650 | ------------------- 651 | 652 | For background, see https://wiki.apache.org/solr/PostingsHighlighter. 653 | 654 | PostingsHighlighter is a new highlighter in Solr4.3 to summarize documents 655 | for summary results. You do this with the 656 | chainable ``postings_highlight()`` method. 657 | 658 | Specify which field we would like to see highlighted: 659 | 660 | :: 661 | 662 | >>> resp = si.query('thief').postings_highlight('name').execute() 663 | >>> resp.highlighting 664 | {u'978-0641723445': {u'name': [u'The Lightning Thief']}} 665 | 666 | It is also possible to specify a array of fields:: 667 | 668 | >>> si.query('thief').postings_highlight(['name', 'title']).options() 669 | {'hl': True, 'hl.fl': 'name,title', 'q': u'thief'} 670 | 671 | 672 | Term Vectors 673 | ------------ 674 | 675 | For background, see https://wiki.apache.org/solr/TermVectorComponent. 676 | 677 | Alongside the normal search results, you can ask solr to return the term 678 | vector, the term frequency, inverse document frequency, and position and offset 679 | information for the documents. 680 | You do this with the chainable ``term_vector()`` method. 681 | 682 | :: 683 | 684 | >>> resp = si.query('thief').term_vector(all=True).execute() 685 | 686 | You can also specify for which fields you would like to get information: 687 | 688 | :: 689 | 690 | >>> resp = si.query('thief').term_vector('name').execute() 691 | 692 | It is also possible to specify a array of fields:: 693 | 694 | >>> si.query('thief').term_vector(['name', 'title'], all=True).execute() 695 | 696 | 697 | More Like This 698 | -------------- 699 | 700 | For background, see http://wiki.apache.org/solr/MoreLikeThis. Alongside a set 701 | of search results, Solr can suggest other documents that are similar to each of 702 | the documents in the search result. 703 | 704 | More-like-this searches are accomplished with the ``mlt()`` chainable option. 705 | Solr needs to know which fields to consider when deciding similarity. 706 | 707 | :: 708 | 709 | >>> resp = si.query(id="978-0641723445").mlt("genre_s", mintf=1, mindf=1).execute() 710 | >>> resp.more_like_these 711 | {u'978-0641723445': } 712 | 713 | >>> resp.more_like_these['978-0641723445'].docs 714 | [{u'_version_': 1462820023772905472, 715 | u'author': u'Rick Riordan', 716 | u'author_s': u'Rick Riordan', 717 | u'cat': [u'book', u'paperback'], 718 | u'genre_s': u'fantasy', 719 | u'id': u'978-1423103349', 720 | u'inStock': True, 721 | u'name': u'The Sea of Monsters', 722 | u'pages_i': 304, 723 | u'price': 6.49, 724 | u'price_c': u'6.49,USD', 725 | u'sequence_i': 2, 726 | u'series_t': u'Percy Jackson and the Olympians'}, 727 | {u'_version_': 1462820023776051200, 728 | u'author': u'Jostein Gaarder', 729 | u'author_s': u'Jostein Gaarder', 730 | u'cat': [u'book', u'paperback'], 731 | u'genre_s': u'fantasy', 732 | u'id': u'978-1857995879', 733 | u'inStock': True, 734 | u'name': u"Sophie's World : The Greek Philosophers", 735 | u'pages_i': 64, 736 | u'price': 3.07, 737 | u'price_c': u'3.07,USD', 738 | u'sequence_i': 1}] 739 | 740 | Here we used ``mlt()`` options to alter the default behaviour (because our 741 | corpus is so small that Solr wouldn't find any similar documents with the 742 | standard behaviour. 743 | 744 | The ``SolrResponse`` object has a ``more_like_these`` attribute. This is a 745 | dictionary of ``SolrResult`` objects, one dictionary entry for each result of 746 | the main query. Here, the query only produced one result (because we searched 747 | on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we find that it 748 | contains only one document. 749 | 750 | We can read the above result as saying that under the ``mlt()`` parameters 751 | requested, there was only one document similar to the search result. 752 | 753 | To avoid having to do the extra dictionary lookup. 754 | 755 | ``mlt()`` also takes a list of options (see the Solr documentation for a full explanation); 756 | 757 | :: 758 | 759 | fields, count, mintf, mindf, minwl, mawl, maxqt, maxntp, boost 760 | 761 | 762 | Alternative parser 763 | ----------------- 764 | 765 | Scorched supports the `dismax` and `edismax` parser. These can be added by 766 | simply calling ``alt_parser``. 767 | 768 | Example:: 769 | 770 | >>> si.query().alt_parser('edismax', mm=2).options() 771 | {'defType': 'edismax', 'mm': 2, 'q': '*:*'} 772 | 773 | The `edismax` parser also supports field aliases. Here is an example where 774 | ``foo`` is aliased to the fields ``bar`` and ``baz``. 775 | 776 | Example:: 777 | 778 | >>> si.query().alt_parser('edismax', f={'foo':['bar', 'baz']}).options() 779 | {'defType': 'edismax', 'q': '*:*', 'f.foo.qf': 'bar baz'} 780 | 781 | 782 | Set request handler 783 | ------------------- 784 | 785 | For background, see https://wiki.apache.org/solr/SolrRequestHandler. 786 | It is possible to set the request handler. To set a different request handler 787 | use ``set_requesthandler``. 788 | 789 | Example:: 790 | 791 | >>> si.query().set_requesthandler('foo').options() 792 | {u'q': u'*:*', u'qt': 'foo'} 793 | 794 | Set debug 795 | --------- 796 | 797 | For background, see https://wiki.apache.org/solr/CommonQueryParameters#Debugging. 798 | To see what Solr is doing with our query we need sometimes more info. To get 799 | this additional information we set ``debug``. 800 | 801 | Example:: 802 | 803 | >>> si.query().debug().options() 804 | {u'debugQuery': True, u'q': u'*:*'} 805 | >>> si.query().debug().execute().debug 806 | {u'QParser': u'LuceneQParser', 807 | u'explain': {u'978-1423103349': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n 1.0 = queryNorm\n', 808 | u'978-1857995879': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n 1.0 = queryNorm\n', 809 | u'978-1933988177': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n 1.0 = queryNorm\n'}, 810 | u'parsedquery': u'MatchAllDocsQuery(*:*)', 811 | u'parsedquery_toString': u'*:*', 812 | u'querystring': u'*:*', 813 | u'rawquerystring': u'*:*', 814 | u'timing': {u'prepare': {u'debug': {u'time': 0.0}, 815 | u'facet': {u'time': 0.0}, 816 | u'highlight': {u'time': 0.0}, 817 | u'mlt': {u'time': 0.0}, 818 | u'query': {u'time': 0.0}, 819 | u'stats': {u'time': 0.0}, 820 | u'time': 0.0}, 821 | u'process': {u'debug': {u'time': 0.0}, 822 | u'facet': {u'time': 0.0}, 823 | u'highlight': {u'time': 0.0}, 824 | u'mlt': {u'time': 0.0}, 825 | u'query': {u'time': 1.0}, 826 | u'stats': {u'time': 0.0}, 827 | u'time': 1.0}, 828 | u'time': 1.0}} 829 | 830 | 831 | Enable spellchecking 832 | -------------------- 833 | 834 | For background, see http://wiki.apache.org/solr/SpellCheckComponent. 835 | It is possible to activate spellchecking in yout query. To do that, 836 | use ``spellcheck``. 837 | 838 | 839 | Example:: 840 | 841 | >>> si.query().spellcheck().options() 842 | {u'q': u'*:*', u'spellcheck': 'true'} 843 | 844 | Realtime Get 845 | ------------ 846 | 847 | For background, see https://wiki.apache.org/solr/RealTimeGet 848 | 849 | Solr 4.0 added support for retrieval of documents that are not yet commited. 850 | The retrieval can only by done by id: :: 851 | 852 | >>> resp = si.get("978-1423103349") 853 | 854 | You can also pass multiple ids: :: 855 | 856 | >>> resp = si.get(["978-0641723445", "978-1423103349"]) 857 | 858 | The return value is the same as for a normal search 859 | 860 | Stats 861 | ----- 862 | 863 | For background, see https://wiki.apache.org/solr/StatsComponent 864 | 865 | Solr can return simple statistics for indexed numeric fields:: 866 | 867 | >>> resp = solr.query().stats('int_field') 868 | 869 | You can also pass multiple fields:: 870 | 871 | >>> resp = solr.query().stats(['int_field', 'float_field']) 872 | 873 | The resulting statistics are available on the response at 874 | ``resp.stats.stats_fields``. 875 | 876 | 877 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. _usage: 2 | 3 | First steps 4 | =========== 5 | 6 | Installing scorched 7 | ------------------- 8 | 9 | You can install scorched via setuptools, pip. 10 | 11 | To use scorched, you'll need an Apache Solr installation. Scorched 12 | currently requires at least version 3.6.1 of Apache Solr. 13 | 14 | Using pip 15 | ~~~~~~~~~ 16 | 17 | If you have `pip `_ installed, just type: 18 | 19 | :: 20 | 21 | $ pip install scorched 22 | 23 | If you've got an old version of scorched installed, and want to 24 | upgrade, then type: 25 | 26 | :: 27 | 28 | $ pip install -U scorched 29 | 30 | That's all you need to do; all dependencies will be pulled in automatically. 31 | 32 | 33 | Configuring a connection 34 | ------------------------ 35 | 36 | Whether you're querying or updating a Solr server, you need to set up a 37 | connection to the Solr server. Pass the URL of the Solr server to a 38 | SolrInterface object. 39 | 40 | :: 41 | 42 | >>> import scorched 43 | >>> si = scorched.SolrInterface("http://localhost:8983/solr/") 44 | 45 | 46 | .. note:: Optional arguments to connection: 47 | :class:`scorched.connection.SolrConnection` 48 | 49 | 50 | Adding documents 51 | ---------------- 52 | 53 | To add data to the scorched instance use a Python dictionary. 54 | 55 | :: 56 | 57 | >>> document = {"id":"0553573403", 58 | ... "cat":"book", 59 | ... "name":"A Game of Thrones", 60 | ... "price":7.99, 61 | ... "inStock": True, 62 | ... "author_t": 63 | ... "George R.R. Martin", 64 | ... "series_t":"A Song of Ice and Fire", 65 | ... "sequence_i":1, 66 | ... "genre_s":"fantasy"} 67 | >>> si.add(document) 68 | 69 | You can add lists of dictionaries in the same way. Given the example 70 | "books.json" file, you could feed it to scorched like so: 71 | 72 | :: 73 | 74 | >>> file = os.path.join(os.path.dirname(__file__), "dumps", 75 | ... "books.json") 76 | >>> with open(file) as f: 77 | ... datajson = f.read() 78 | ... docs = json.loads(self.datajson) 79 | >>> si.add(docs) 80 | >>> si.commit() 81 | 82 | .. note:: Optional arguments to add: 83 | 84 | See http://wiki.apache.org/solr/UpdateXmlMessages for details. Or the api 85 | documentation: TODO link 86 | 87 | Deleting documents 88 | ------------------ 89 | 90 | You can delete documents individually, or delete all documents resulting from a 91 | query. 92 | 93 | To delete documents individually, you need to pass a list of the document ids 94 | to scorched. 95 | 96 | :: 97 | 98 | >>> si.delete_by_ids([obj.id]) 99 | >>> si.delete_by_ids([x.id for x in objs]) 100 | 101 | To delete documents by query, you construct one or more queries from `Q` 102 | objects, in the same way that you construct a query as explained in 103 | :ref:`optional-terms`. You then pass those query into the 104 | ``delete_by_query()`` method: 105 | 106 | :: 107 | 108 | >>> si.delete_by_query(query=si.Q("game")) 109 | 110 | To clear the entire index, there is a shortcut which simply deletes every 111 | document in the index. 112 | 113 | :: 114 | 115 | >>> si.delete_all() 116 | 117 | Deletions, like additions, only take effect after a commit (or autocommit). 118 | 119 | .. note:: Optional arguments to delete: 120 | 121 | See http://wiki.apache.org/solr/UpdateXmlMessages for details. Or the api 122 | documentation: TODO link 123 | 124 | Optimizing 125 | ---------- 126 | 127 | After updating an index with new data, it becomes fragmented and performance 128 | suffers. This means that you need to optimize the index. When and how often you 129 | do this is something you need to decide on a case by case basis. If you only 130 | add data infrequently, you should optimize after every new update; if you 131 | trickle in data on a frequent basis, you need to think more about it. See 132 | http://wiki.apache.org/solr/SolrPerformanceFactors#Optimization_Considerations. 133 | 134 | Either way, to optimize an index, simply call: 135 | 136 | :: 137 | 138 | >>> si.optimize() 139 | 140 | A Solr optimize also performs a commit, so if you’re about to ``optimize()`` 141 | anyway, you can leave off the preceding ``commit()``. It doesn’t particularly 142 | hurt to do both though. 143 | 144 | Rollback 145 | -------- 146 | 147 | If you haven’t yet added/deleted documents since the last commit, you can issue 148 | a rollback to revert the index state to that of the last commit. 149 | 150 | :: 151 | 152 | >>> si.rollback() 153 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.isort] 2 | profile = "black" 3 | 4 | [tool.black] 5 | py37 = true 6 | 7 | [tool.check-manifest] 8 | ignore = [".flake8", ".pre-commit-config.yaml", "pyproject.toml"] 9 | -------------------------------------------------------------------------------- /scorched/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from scorched.connection import SolrInterface 3 | 4 | __all__ = ['SolrInterface'] 5 | -------------------------------------------------------------------------------- /scorched/compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | _ver = sys.version_info 4 | is_py2 = (_ver[0] == 2) 5 | is_py3 = (_ver[0] == 3) 6 | 7 | 8 | if is_py2: # pragma: no cover 9 | from urllib import (quote, unquote, quote_plus, unquote_plus, urlencode, 10 | getproxies, proxy_bypass) 11 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 12 | from urllib2 import parse_http_list 13 | import cookielib 14 | from Cookie import Morsel 15 | from StringIO import StringIO 16 | from httplib import IncompleteRead 17 | 18 | builtin_str = str 19 | bytes = str 20 | str = unicode 21 | basestring = basestring 22 | numeric_types = (int, long, float) 23 | 24 | 25 | elif is_py3: # pragma: no cover 26 | from urllib.parse import (urlparse, urlunparse, urljoin, urlsplit, 27 | urlencode, quote, unquote, quote_plus, 28 | unquote_plus, urldefrag) 29 | from urllib.request import parse_http_list, getproxies, proxy_bypass 30 | from http import cookiejar as cookielib 31 | from http.cookies import Morsel 32 | from io import StringIO 33 | from http.client import IncompleteRead 34 | 35 | builtin_str = str 36 | str = str 37 | bytes = bytes 38 | basestring = (str, bytes) 39 | numeric_types = (int, float) 40 | 41 | 42 | def python_2_unicode_compatible(cls): 43 | """ 44 | A decorator that defines __unicode__ and __str__ methods under Python 45 | 2. Under Python 3 it does nothing. 46 | 47 | To support Python 2 and 3 with a single code base, define a __str__ 48 | method returning unicode text and apply this decorator to the class. 49 | 50 | The implementation comes from django.utils.encoding. 51 | """ 52 | if not is_py3: # pragma: no cover 53 | cls.__unicode__ = cls.__str__ 54 | cls.__str__ = lambda self: self.__unicode__().encode('utf-8') 55 | return cls 56 | -------------------------------------------------------------------------------- /scorched/connection.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import itertools 4 | import json 5 | import time 6 | import warnings 7 | 8 | import requests 9 | 10 | import scorched.compat 11 | import scorched.dates 12 | import scorched.exc 13 | import scorched.response 14 | import scorched.search 15 | from scorched.compat import str 16 | 17 | MAX_LENGTH_GET_URL = 2048 18 | # Jetty default is 4096; Tomcat default is 8192; picking 2048 to be 19 | # conservative. 20 | 21 | 22 | def is_iter(val): 23 | return isinstance(val, (tuple, list)) 24 | 25 | 26 | class SolrConnection(object): 27 | readable = True 28 | writeable = True 29 | 30 | def __init__( 31 | self, 32 | url, 33 | http_connection, 34 | mode, 35 | retry_timeout, 36 | max_length_get_url, 37 | search_timeout=(), 38 | ): 39 | """ 40 | :param url: url to Solr 41 | :type url: str 42 | :param http_connection: existing requests.Session object, or None to 43 | create a new one. 44 | :type http_connection: requests connection 45 | :param mode: mode (readable, writable) Solr 46 | :type mode: str 47 | :param retry_timeout: timeout until retry 48 | :type retry_timeout: int 49 | :param max_length_get_url: max length until switch to post 50 | :type max_length_get_url: int 51 | :param search_timeout: (optional) How long to wait for the server to 52 | send data before giving up, as a float, or a 53 | (connect timeout, read timeout) tuple. 54 | :type search_timeout: float or tuple 55 | """ 56 | self.http_connection = http_connection or requests.Session() 57 | if mode == "r": 58 | self.writeable = False 59 | elif mode == "w": 60 | self.readable = False 61 | self.url = url.rstrip("/") + "/" 62 | self.update_url = self.url + "update/json" 63 | self.select_url = self.url + "select/" 64 | self.mlt_url = self.url + "mlt/" 65 | self.get_url = self.url + "get/" 66 | self.retry_timeout = retry_timeout 67 | self.max_length_get_url = max_length_get_url 68 | self.search_timeout = search_timeout 69 | 70 | def request(self, *args, **kwargs): 71 | """ 72 | :param args: arguments 73 | :type args: tuple 74 | :param kwargs: key word arguments 75 | :type kwargs: dict 76 | 77 | .. todo:: 78 | Make this api more explicit! 79 | """ 80 | try: 81 | return self.http_connection.request(*args, **kwargs) 82 | except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): 83 | if self.retry_timeout < 0: 84 | raise 85 | time.sleep(self.retry_timeout) 86 | return self.http_connection.request(*args, **kwargs) 87 | 88 | def get(self, ids, fl=None): 89 | """ 90 | Perform a RealTime Get 91 | """ 92 | # We always send the ids parameter to force the standart output format, 93 | # but use the id parameter for our actual data as `ids` can no handle 94 | # ids with commas 95 | params = [ 96 | ("ids", ""), 97 | ("wt", "json"), 98 | ] 99 | if is_iter(ids): 100 | for id in ids: 101 | params.append(("id", id)) 102 | else: 103 | params.append(("id", ids)) 104 | if fl: 105 | params.append(("fl", ",".join(fl))) 106 | 107 | qs = scorched.compat.urlencode(params) 108 | url = "%s?%s" % (self.get_url, qs) 109 | 110 | response = self.request("GET", url) 111 | if response.status_code != 200: 112 | raise scorched.exc.SolrError(response) 113 | return response.text 114 | 115 | def update(self, update_doc, **kwargs): 116 | """ 117 | :param update_doc: data send to Solr 118 | :type update_doc: json data 119 | :returns: json -- json string 120 | 121 | Send json to Solr 122 | """ 123 | if not self.writeable: 124 | raise TypeError("This Solr instance is only for reading") 125 | body = update_doc 126 | if body: 127 | headers = {"Content-Type": "application/json; charset=utf-8"} 128 | else: 129 | headers = {} 130 | url = self.url_for_update(**kwargs) 131 | response = self.request("POST", url, data=body, headers=headers) 132 | if response.status_code != 200: 133 | raise scorched.exc.SolrError(response) 134 | return response.text 135 | 136 | def url_for_update( 137 | self, 138 | commit=None, 139 | commitWithin=None, 140 | softCommit=None, 141 | optimize=None, 142 | waitSearcher=None, 143 | expungeDeletes=None, 144 | maxSegments=None, 145 | ): 146 | """ 147 | :param commit: optional -- commit actions 148 | :type commit: bool 149 | :param commitWithin: optional -- document will be added within that 150 | time 151 | :type commitWithin: int 152 | :param softCommit: optional -- performant commit without "on-disk" 153 | guarantee 154 | :type softCommit: bool 155 | :param optimize: optional -- optimize forces all of the index segments 156 | to be merged into a single segment first. 157 | :type optimze: bool 158 | :param waitSearcher: optional -- block until a new searcher is opened 159 | and registered as the main query searcher, 160 | :type waitSearcher: bool 161 | :param expungeDeletes: optional -- merge segments with deletes away 162 | :type expungeDeletes: bool 163 | :param maxSegments: optional -- optimizes down to at most this number 164 | of segments 165 | :type maxSegments: int 166 | :returns: str -- url with all extra paramters set 167 | 168 | This functions sets all extra parameters for the ``optimize`` and 169 | ``commit`` function. 170 | """ 171 | extra_params = {} 172 | if commit is not None: 173 | extra_params["commit"] = "true" if commit else "false" 174 | if commitWithin is not None: 175 | try: 176 | extra_params["commitWithin"] = int(commitWithin) 177 | except (TypeError, ValueError): 178 | raise ValueError("commitWithin should be a number in milliseconds") 179 | if extra_params["commitWithin"] < 0: 180 | raise ValueError("commitWithin should be a number in milliseconds") 181 | extra_params["commitWithin"] = str(extra_params["commitWithin"]) 182 | if softCommit is not None: 183 | extra_params["softCommit"] = "true" if softCommit else "false" 184 | if optimize is not None: 185 | extra_params["optimize"] = "true" if optimize else "false" 186 | if waitSearcher is not None: 187 | extra_params["waitSearcher"] = "true" if waitSearcher else "false" 188 | if expungeDeletes is not None: 189 | extra_params["expungeDeletes"] = "true" if expungeDeletes else "false" 190 | if maxSegments is not None: 191 | try: 192 | extra_params["maxSegments"] = int(maxSegments) 193 | except (TypeError, ValueError): 194 | raise ValueError("maxSegments") 195 | if extra_params["maxSegments"] <= 0: 196 | raise ValueError("maxSegments should be a positive number") 197 | extra_params["maxSegments"] = str(extra_params["maxSegments"]) 198 | if "expungeDeletes" in extra_params and "commit" not in extra_params: 199 | raise ValueError("Can't do expungeDeletes without commit") 200 | if "maxSegments" in extra_params and "optimize" not in extra_params: 201 | raise ValueError("Can't do maxSegments without optimize") 202 | if extra_params: 203 | return "%s?%s" % ( 204 | self.update_url, 205 | scorched.compat.urlencode(sorted(extra_params.items())), 206 | ) 207 | else: 208 | return self.update_url 209 | 210 | def select(self, params): 211 | """ 212 | :param params: LuceneQuery converted to a dictionary with search 213 | queries 214 | :type params: dict 215 | :returns: json -- json string 216 | 217 | We perform here a search on the `select` handler of Solr. 218 | """ 219 | if not self.readable: 220 | raise TypeError("This Solr instance is only for writing") 221 | params.append(("wt", "json")) 222 | qs = scorched.compat.urlencode(params) 223 | url = "%s?%s" % (self.select_url, qs) 224 | if len(url) > self.max_length_get_url: 225 | warnings.warn( 226 | "Long query URL encountered - POSTing instead of " 227 | "GETting. This query will not be cached at the HTTP layer" 228 | ) 229 | url = self.select_url 230 | method = "POST" 231 | kwargs = { 232 | "data": qs, 233 | "headers": {"Content-Type": "application/x-www-form-urlencoded"}, 234 | } 235 | else: 236 | method = "GET" 237 | kwargs = {} 238 | if self.search_timeout != (): 239 | kwargs["timeout"] = self.search_timeout 240 | response = self.request(method, url, **kwargs) 241 | if response.status_code != 200: 242 | raise scorched.exc.SolrError(response) 243 | return response.text 244 | 245 | def mlt(self, params, content=None): 246 | """ 247 | :param params: LuceneQuery converted to a dictionary with search 248 | queries 249 | :type params: dict 250 | :returns: json -- json string 251 | 252 | Perform a MoreLikeThis query using the content specified 253 | There may be no content if stream.url is specified in the params. 254 | """ 255 | if not self.readable: 256 | raise TypeError("This Solr instance is only for writing") 257 | params.append(("wt", "json")) 258 | qs = scorched.compat.urlencode(params) 259 | base_url = "%s?%s" % (self.mlt_url, qs) 260 | method = "GET" 261 | kwargs = {} 262 | if content is None: 263 | url = base_url 264 | else: 265 | get_url = "%s&stream.body=%s" % ( 266 | base_url, 267 | scorched.compat.quote_plus(content), 268 | ) 269 | if len(get_url) <= self.max_length_get_url: 270 | url = get_url 271 | else: 272 | url = base_url 273 | method = "POST" 274 | kwargs = { 275 | "data": content, 276 | "headers": {"Content-Type": "text/plain; charset=utf-8"}, 277 | } 278 | response = self.request(method, url, **kwargs) 279 | if response.status_code != 200: 280 | raise scorched.exc.SolrError(response.content) 281 | return response.text 282 | 283 | 284 | class SolrInterface(object): 285 | remote_schema_file = "schema?wt=json" 286 | 287 | def __init__( 288 | self, 289 | url, 290 | http_connection=None, 291 | mode="", 292 | retry_timeout=-1, 293 | max_length_get_url=MAX_LENGTH_GET_URL, 294 | search_timeout=(), 295 | ): 296 | """ 297 | :param url: url to Solr 298 | :type url: str 299 | :param http_connection: optional -- already existing connection 300 | :type http_connection: requests connection 301 | :param mode: optional -- mode (readable, writable) Solr 302 | :type mode: str 303 | :param retry_timeout: optional -- timeout until retry 304 | :type retry_timeout: int 305 | :param max_length_get_url: optional -- max length until switch to post 306 | :type max_length_get_url: int 307 | :param search_timeout: (optional) How long to wait for the server to 308 | send data before giving up, as a float, or a 309 | (connect timeout, read timeout) tuple. 310 | :type search_timeout: float or tuple 311 | """ 312 | 313 | self.conn = SolrConnection( 314 | url, http_connection, mode, retry_timeout, max_length_get_url 315 | ) 316 | self.schema = self.init_schema() 317 | self._datefields = self._extract_datefields(self.schema) 318 | 319 | def init_schema(self): 320 | response = self.conn.request( 321 | "GET", scorched.compat.urljoin(self.conn.url, self.remote_schema_file) 322 | ) 323 | if response.status_code != 200: 324 | raise EnvironmentError( 325 | "Couldn't retrieve schema document - status code %s\n%s" 326 | % (response.status_code, response.content) 327 | ) 328 | return response.json()["schema"] 329 | 330 | def _extract_datefields(self, schema): 331 | # attn: in modern solr (>=8.x) date fields are declared 332 | # as 334 | # instead of 336 | # This schema parsing is determining the fields by name 337 | # and not by java class type. Therefore this is error-prone. 338 | ret = [x["name"] for x in schema["fields"] if x["type"] in ["pdate", "date"]] 339 | ret.extend( 340 | [ 341 | x["name"] 342 | for x in schema["dynamicFields"] 343 | if x["type"] in ["pdate", "date"] 344 | ] 345 | ) 346 | return ret 347 | 348 | def _should_skip_value(self, value): 349 | if value is None: 350 | return True 351 | if isinstance(value, dict) and "set" in value and value["set"] is None: 352 | return True 353 | return False 354 | 355 | def _prepare_date(self, value): 356 | """Prepare a value of type date""" 357 | if is_iter(value): 358 | value = [str(scorched.dates.solr_date(v)) for v in value] 359 | else: 360 | value = str(scorched.dates.solr_date(value)) 361 | return value 362 | 363 | def _prepare_docs(self, docs): 364 | prepared_docs = [] 365 | for doc in docs: 366 | new_doc = {} 367 | for name, value in list(doc.items()): 368 | # XXX remove all None fields this is needed for adding date 369 | # fields 370 | if self._should_skip_value(value): 371 | continue 372 | if scorched.dates.is_datetime_field(name, self._datefields): 373 | if isinstance(value, dict) and "set" in value: 374 | value["set"] = self._prepare_date(value["set"]) 375 | else: 376 | value = self._prepare_date(value) 377 | new_doc[name] = value 378 | prepared_docs.append(new_doc) 379 | return prepared_docs 380 | 381 | def add(self, docs, chunk=100, **kwargs): 382 | """ 383 | :param docs: documents to be added 384 | :type docs: dict 385 | :param chunk: optional -- size of chunks in which the add command 386 | should be split 387 | :type chunk: int 388 | :param kwargs: optinal -- additional arguments 389 | :type kwargs: dict 390 | :returns: list of SolrUpdateResponse -- A Solr response object. 391 | 392 | Add a document or a list of document to Solr. 393 | """ 394 | if hasattr(docs, "items") or not is_iter(docs): 395 | docs = [docs] 396 | # to avoid making messages too large, we break the message every 397 | # chunk docs. 398 | ret = [] 399 | for doc_chunk in grouper(docs, chunk): 400 | update_message = json.dumps(self._prepare_docs(doc_chunk)) 401 | ret.append( 402 | scorched.response.SolrUpdateResponse.from_json( 403 | self.conn.update(update_message, **kwargs) 404 | ) 405 | ) 406 | return ret 407 | 408 | def delete_by_query(self, query, **kwargs): 409 | """ 410 | :param query: criteria how witch entries should be deleted 411 | :type query: LuceneQuery 412 | :returns: SolrUpdateResponse -- A Solr response object. 413 | 414 | Delete entries by a given query 415 | """ 416 | delete_message = json.dumps({"delete": {"query": str(query)}}) 417 | ret = scorched.response.SolrUpdateResponse.from_json( 418 | self.conn.update(delete_message, **kwargs) 419 | ) 420 | return ret 421 | 422 | def delete_by_ids(self, ids, **kwargs): 423 | """ 424 | :param ids: ids of entries that should be deleted 425 | :type ids: list 426 | :returns: SolrUpdateResponse -- A Solr response object. 427 | 428 | Delete entries by a given id 429 | """ 430 | delete_message = json.dumps({"delete": ids}) 431 | ret = scorched.response.SolrUpdateResponse.from_json( 432 | self.conn.update(delete_message, **kwargs) 433 | ) 434 | return ret 435 | 436 | def commit(self, waitSearcher=None, expungeDeletes=None, softCommit=None): 437 | """ 438 | :param waitSearcher: optional -- block until a new searcher is opened 439 | and registered as the main query searcher, making 440 | the changes visible 441 | :type waitSearcher: bool 442 | :param expungeDeletes: optional -- merge segments with deletes away 443 | :type expungeDeletes: bool 444 | :param softCommit: optional -- perform a soft commit - this will 445 | refresh the 'view' of the index in a more performant 446 | manner, but without "on-disk" guarantees. 447 | :type softCommit: bool 448 | :returns: SolrUpdateResponse -- A Solr response object. 449 | 450 | A commit operation makes index changes visible to new search requests. 451 | """ 452 | ret = scorched.response.SolrUpdateResponse.from_json( 453 | self.conn.update( 454 | '{"commit": {}}', 455 | commit=True, 456 | waitSearcher=waitSearcher, 457 | expungeDeletes=expungeDeletes, 458 | softCommit=softCommit, 459 | ) 460 | ) 461 | return ret 462 | 463 | def optimize(self, waitSearcher=None, maxSegments=None): 464 | """ 465 | :param waitSearcher: optional -- block until a new searcher is opened 466 | and registered as the main query searcher, making 467 | the changes visible 468 | :type waitSearcher: bool 469 | :param maxSegments: optional -- optimizes down to at most this number 470 | of segments 471 | :type maxSegments: int 472 | :returns: SolrUpdateResponse -- A Solr response object. 473 | 474 | An optimize is like a hard commit except that it forces all of the 475 | index segments to be merged into a single segment first. 476 | """ 477 | ret = scorched.response.SolrUpdateResponse.from_json( 478 | self.conn.update( 479 | '{"optimize": {}}', 480 | optimize=True, 481 | waitSearcher=waitSearcher, 482 | maxSegments=maxSegments, 483 | ) 484 | ) 485 | return ret 486 | 487 | def rollback(self): 488 | """ 489 | :returns: SolrUpdateResponse -- A Solr response object. 490 | 491 | The rollback command rollbacks all add/deletes made to the index since 492 | the last commit 493 | """ 494 | ret = scorched.response.SolrUpdateResponse.from_json( 495 | self.conn.update('{"rollback": {}}') 496 | ) 497 | return ret 498 | 499 | def delete_all(self): 500 | """ 501 | :returns: SolrUpdateResponse -- A Solr response object. 502 | 503 | Delete everything 504 | """ 505 | return self.delete_by_query(self.Q(**{"*": "*"})) 506 | 507 | def get(self, ids, fields=None): 508 | """ 509 | RealTime Get document(s) by id(s) 510 | 511 | :param ids: id(s) of the document(s) 512 | :type ids: list, string or int 513 | :param fields: optional -- list of fields to return 514 | :type fileds: list of strings 515 | """ 516 | ret = scorched.response.SolrResponse.from_get_json( 517 | self.conn.get(ids, fields), self._datefields 518 | ) 519 | return ret 520 | 521 | def search(self, **kwargs): 522 | """ 523 | :returns: SolrResponse -- A Solr response object. 524 | 525 | Search solr 526 | """ 527 | params = scorched.search.params_from_dict(**kwargs) 528 | ret = scorched.response.SolrResponse.from_json( 529 | self.conn.select(params), 530 | self.schema["uniqueKey"], 531 | self._datefields, 532 | ) 533 | return ret 534 | 535 | def query(self, *args, **kwargs): 536 | """ 537 | :returns: SolrSearch -- A solrsearch. 538 | 539 | Build a Solr query 540 | """ 541 | q = scorched.search.SolrSearch(self) 542 | if len(args) + len(kwargs) > 0: 543 | return q.query(*args, **kwargs) 544 | else: 545 | return q 546 | 547 | def mlt_search(self, content=None, **kwargs): 548 | """ 549 | :returns: SolrResponse -- A Solr response object. 550 | 551 | More like this search Solr 552 | """ 553 | params = scorched.search.params_from_dict(**kwargs) 554 | ret = scorched.response.SolrResponse.from_json( 555 | self.conn.mlt(params, content=content), 556 | self.schema["uniqueKey"], 557 | self._datefields, 558 | ) 559 | return ret 560 | 561 | def mlt_query( 562 | self, 563 | fields, 564 | content=None, 565 | content_charset=None, 566 | url=None, 567 | query_fields=None, 568 | **kwargs 569 | ): 570 | """ 571 | :param fields: field names to compute similarity upon 572 | :type fields: list 573 | :param content: optional -- string on witch to find similar documents 574 | :type content: str 575 | :param content_charset: optional -- charset e.g. (iso-8859-1) 576 | :type content_charset: str 577 | :param url: optional -- like content but retrive directly from url 578 | :type url: str 579 | :param query_fields: optional -- adjust boosting values for ``fields`` 580 | :type query_fields: dict e.g. ({"a": 0.25, "b": 0.75}) 581 | :returns: MltSolrSearch 582 | 583 | Perform a similarity query on MoreLikeThisHandler 584 | 585 | The MoreLikeThisHandler is expected to be registered at the '/mlt' 586 | endpoint in the solrconfig.xml file of the server. 587 | 588 | Other MoreLikeThis specific parameters can be passed as kwargs without 589 | the 'mlt.' prefix. 590 | """ 591 | q = scorched.search.MltSolrSearch( 592 | self, content=content, content_charset=content_charset, url=url 593 | ) 594 | return q.mlt(fields=fields, query_fields=query_fields, **kwargs) 595 | 596 | def extract(self, fh, extractOnly=True, extractFormat="text"): 597 | """ 598 | :param fh: binary file (PDF, MSWord, ODF, ...) 599 | :type fh: open file handle 600 | :returns: SolrExtract 601 | 602 | Extract text and metadatada from binary file. 603 | 604 | The ExtractingRequestHandler is expected to be registered at the 605 | '/update/extract' endpoint in the solrconfig.xml file of the server. 606 | """ 607 | url = self.conn.url + "update/extract" 608 | params = {"wt": "json"} 609 | if extractOnly: 610 | params["extractOnly"] = "true" 611 | params["extractFormat"] = extractFormat 612 | files = {"file": fh} 613 | response = self.conn.request("POST", url, params=params, files=files) 614 | if response.status_code != 200: 615 | raise scorched.exc.SolrError(response) 616 | return scorched.response.SolrExtract.from_json(response.json()) 617 | 618 | def Q(self, *args, **kwargs): 619 | q = scorched.search.LuceneQuery() 620 | q.add(args, kwargs) 621 | return q 622 | 623 | 624 | def grouper(iterable, n): 625 | """ 626 | grouper('ABCDEFG', 3) --> [['ABC'], ['DEF'], ['G']] 627 | """ 628 | i = iter(iterable) 629 | g = list(itertools.islice(i, 0, n)) 630 | while g: 631 | yield g 632 | g = list(itertools.islice(i, 0, n)) 633 | -------------------------------------------------------------------------------- /scorched/dates.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import fnmatch 3 | import math 4 | import re 5 | 6 | import pytz 7 | 8 | import scorched.exc 9 | 10 | year = r"[+/-]?\d+" 11 | tzd = r"Z|((?P[-+])(?P\d\d):(?P\d\d))" 12 | extended_iso_template = ( 13 | r"(?P" 14 | + year 15 | + r""") 16 | (-(?P\d\d) 17 | (-(?P\d\d) 18 | ([T%s](?P\d\d) 19 | :(?P\d\d) 20 | (:(?P\d\d) 21 | (.(?P\d+))?)? 22 | (""" 23 | + tzd 24 | + """)?)? 25 | )?)?""" 26 | ) 27 | extended_iso = extended_iso_template % " " 28 | extended_iso_re = re.compile("^" + extended_iso + "$", re.X) 29 | 30 | 31 | def datetime_from_w3_datestring(s): 32 | """We need to extend ISO syntax (as permitted by the standard) to allow 33 | for dates before 0AD and after 9999AD. This is how to parse such a string 34 | """ 35 | m = extended_iso_re.match(s) 36 | if not m: 37 | raise ValueError 38 | d = m.groupdict() 39 | d["year"] = int(d["year"]) 40 | d["month"] = int(d["month"] or 1) 41 | d["day"] = int(d["day"] or 1) 42 | d["hour"] = int(d["hour"] or 0) 43 | d["minute"] = int(d["minute"] or 0) 44 | d["fraction"] = d["fraction"] or "0" 45 | d["second"] = float("%s.%s" % ((d["second"] or "0"), d["fraction"])) 46 | del d["fraction"] 47 | if d["tzd_sign"]: 48 | if d["tzd_sign"] == "+": 49 | tzd_sign = 1 50 | elif d["tzd_sign"] == "-": 51 | tzd_sign = -1 52 | tz_delta = datetime_delta_factory( 53 | tzd_sign * int(d["tzd_hour"]), tzd_sign * int(d["tzd_minute"]) 54 | ) 55 | else: 56 | tz_delta = datetime_delta_factory(0, 0) 57 | del d["tzd_sign"] 58 | del d["tzd_hour"] 59 | del d["tzd_minute"] 60 | d["tzinfo"] = pytz.utc 61 | dt = datetime_factory(**d) + tz_delta 62 | return dt 63 | 64 | 65 | class DateTimeRangeError(ValueError): 66 | pass 67 | 68 | 69 | def datetime_factory(**kwargs): 70 | second = kwargs.get("second") 71 | if second is not None: 72 | f, i = math.modf(second) 73 | kwargs["second"] = int(i) 74 | kwargs["microsecond"] = int(f * 1000000) 75 | try: 76 | return datetime.datetime(**kwargs) 77 | except ValueError as e: 78 | raise DateTimeRangeError(e.args[0]) 79 | 80 | 81 | def datetime_delta_factory(hours, minutes): 82 | return datetime.timedelta(hours=hours, minutes=minutes) 83 | 84 | 85 | class solr_date(object): 86 | """ 87 | This class can be initialized from native python datetime 88 | objects and will serialize to a format appropriate for Solr 89 | """ 90 | 91 | def __init__(self, v): 92 | if isinstance(v, solr_date): 93 | self._dt_obj = v._dt_obj 94 | elif isinstance(v, str): 95 | self._dt_obj = datetime_from_w3_datestring(v) 96 | elif hasattr(v, "strftime"): 97 | self._dt_obj = self.from_date(v) 98 | else: 99 | raise scorched.exc.SolrError( 100 | "Cannot initialize solr_date from %s object" % type(v) 101 | ) 102 | 103 | def __hash__(self): 104 | return self._dt_obj.__hash__() 105 | 106 | @staticmethod 107 | def from_date(dt_obj): 108 | # Python datetime objects may include timezone information 109 | if hasattr(dt_obj, "tzinfo") and dt_obj.tzinfo: 110 | # but Solr requires UTC times. 111 | return dt_obj.astimezone(pytz.utc).replace(tzinfo=None) 112 | else: 113 | return dt_obj 114 | 115 | @property 116 | def microsecond(self): 117 | return self._dt_obj.microsecond 118 | 119 | def __repr__(self): 120 | return repr(self._dt_obj) 121 | 122 | def __str__(self): 123 | """Serialize a datetime object in the format required 124 | by Solr. See http://wiki.apache.org/solr/IndexingDates 125 | """ 126 | dt_obj = self._dt_obj 127 | if hasattr(dt_obj, "tzinfo") and dt_obj.tzinfo: 128 | # but Solr requires UTC times. 129 | dt_obj = dt_obj.astimezone(pytz.utc).replace(tzinfo=None) 130 | return "%sZ" % (dt_obj.isoformat(),) 131 | 132 | def __lt__(self, other): 133 | try: 134 | other = other._dt_obj 135 | except AttributeError: 136 | pass 137 | return self._dt_obj < other 138 | 139 | def __eq__(self, other): 140 | try: 141 | other = other._dt_obj 142 | except AttributeError: 143 | pass 144 | return self._dt_obj == other 145 | 146 | 147 | def is_datetime_field(name, datefields): 148 | if name in datefields: 149 | return True 150 | for fieldpattern in [d for d in datefields if "*" in d]: 151 | # XXX: there is better than fnmatch ? 152 | if fnmatch.fnmatch(name, fieldpattern): 153 | return True 154 | return False 155 | -------------------------------------------------------------------------------- /scorched/exc.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | 4 | class SolrError(Exception): 5 | pass 6 | -------------------------------------------------------------------------------- /scorched/response.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import json 4 | from collections.abc import Sequence 5 | 6 | import scorched.dates 7 | from scorched.compat import str 8 | from scorched.search import is_iter 9 | 10 | 11 | class SolrFacetCounts(object): 12 | members = ( 13 | "facet_dates", 14 | "facet_fields", 15 | "facet_queries", 16 | "facet_ranges", 17 | "facet_pivot", 18 | ) 19 | 20 | def __init__(self, **kwargs): 21 | for member in self.members: 22 | setattr(self, member, kwargs.get(member, {})) 23 | self.facet_fields = dict(self.facet_fields) 24 | 25 | @classmethod 26 | def from_json(cls, response): 27 | try: 28 | facet_counts = response["facet_counts"] 29 | except KeyError: 30 | return SolrFacetCounts() 31 | facet_fields = {} 32 | for facet_field, facet_values in list(facet_counts["facet_fields"].items()): 33 | facets = [] 34 | # Change each facet list from [a, 1, b, 2, c, 3 ...] to 35 | # [(a, 1), (b, 2), (c, 3) ...] 36 | for n, value in enumerate(facet_values): 37 | if n & 1 == 0: 38 | name = value 39 | else: 40 | facets.append((name, value)) 41 | facet_fields[facet_field] = facets 42 | facet_counts["facet_fields"] = facet_fields 43 | for facet_field in list(facet_counts["facet_ranges"].keys()): 44 | counts = [] 45 | count_list = facet_counts["facet_ranges"][facet_field]["counts"] 46 | # Change each facet list from [a, 1, b, 2, c, 3 ...] to 47 | # [(a, 1), (b, 2), (c, 3) ...] 48 | for n, value in enumerate(count_list): 49 | if n & 1 == 0: 50 | name = value 51 | else: 52 | counts.append((name, value)) 53 | facet_counts["facet_ranges"][facet_field]["counts"] = counts 54 | return SolrFacetCounts(**facet_counts) 55 | 56 | 57 | class SolrExtract(object): 58 | @classmethod 59 | def from_json(cls, doc, filename=None): 60 | self = cls() 61 | if filename is None: 62 | for attrname in doc: 63 | if attrname.endswith("_metadata"): 64 | filename = attrname[:-9] 65 | self.text = doc[filename] 66 | metadata = doc[filename + "_metadata"] 67 | self.metadata = dict(zip(metadata[0::2], metadata[1::2])) 68 | for attr in ["QTime", "status"]: 69 | setattr(self, attr, doc["responseHeader"].get(attr)) 70 | return self 71 | 72 | 73 | class SolrStats(object): 74 | members = ( 75 | "stats_fields", 76 | "facet", 77 | ) 78 | 79 | def __init__(self, **kwargs): 80 | for member in self.members: 81 | setattr(self, member, kwargs.get(member, ())) 82 | self.stats_fields = dict(self.stats_fields) 83 | 84 | @classmethod 85 | def from_json(cls, response): 86 | try: 87 | stats_response = response["stats"] 88 | except KeyError: 89 | return SolrStats() 90 | stats = {"stats_fields": {}} 91 | # faceted stats, if present, are included within the field 92 | for field, values in list(stats_response["stats_fields"].items()): 93 | stats["stats_fields"][field] = values 94 | 95 | return SolrStats(**stats) 96 | 97 | 98 | class SolrUpdateResponse(object): 99 | @classmethod 100 | def from_json(cls, jsonmsg): 101 | self = cls() 102 | self.original_json = jsonmsg 103 | doc = json.loads(jsonmsg) 104 | details = doc["responseHeader"] 105 | for attr in ["QTime", "params", "status"]: 106 | setattr(self, attr, details.get(attr)) 107 | if self.status != 0: 108 | raise ValueError("Response indicates an error") 109 | return self 110 | 111 | 112 | class SolrResponse(Sequence): 113 | @classmethod 114 | def from_json(cls, jsonmsg, unique_key, datefields=()): 115 | self = cls() 116 | self.original_json = jsonmsg 117 | doc = json.loads(jsonmsg) 118 | details = doc["responseHeader"] 119 | for attr in ["QTime", "params", "status"]: 120 | setattr(self, attr, details.get(attr)) 121 | if self.status != 0: 122 | raise ValueError("Response indicates an error") 123 | self.result = SolrResult() 124 | if doc.get("response"): 125 | self.result = SolrResult.from_json(doc["response"], datefields) 126 | # TODO mlt/ returns match what should we do with it ? 127 | # if doc.get('match'): 128 | # self.result = SolrResult.from_json(doc['match'], datefields) 129 | self.facet_counts = SolrFacetCounts.from_json(doc) 130 | self.spellcheck = doc.get("spellcheck", {}) 131 | if self.params is not None: 132 | self.group_field = self.params.get("group.field") 133 | else: 134 | self.group_field = None 135 | self.groups = {} 136 | if self.group_field is not None: 137 | self.groups = SolrGroupResult.from_json( 138 | doc["grouped"], self.group_field, datefields 139 | ) 140 | self.highlighting = doc.get("highlighting", {}) 141 | if self.highlighting: 142 | # Add highlighting info to the individual documents. 143 | if doc.get("response"): 144 | for d in self.result.docs: 145 | k = str(d[unique_key]) 146 | if k in self.highlighting: 147 | d["solr_highlights"] = self.highlighting[k] 148 | elif doc.get("grouped"): 149 | for group in getattr(self.groups, self.group_field)["groups"]: 150 | for d in group["doclist"]["docs"]: 151 | k = str(d[unique_key]) 152 | if k in self.highlighting: 153 | d["solr_highlights"] = self.highlighting[k] 154 | 155 | self.debug = doc.get("debug", {}) 156 | self.next_cursor_mark = doc.get("nextCursorMark") 157 | self.more_like_these = dict( 158 | (k, SolrResult.from_json(v, datefields)) 159 | for (k, v) in list(doc.get("moreLikeThis", {}).items()) 160 | ) 161 | self.term_vectors = self.parse_term_vectors(doc.get("termVectors", [])) 162 | # can be computed by MoreLikeThisHandler 163 | self.interesting_terms = doc.get("interestingTerms", None) 164 | self.stats = SolrStats.from_json(doc) 165 | return self 166 | 167 | @classmethod 168 | def from_get_json(cls, jsonmsg, datefields=()): 169 | """Generate instance from the response of a RealTime Get""" 170 | self = cls() 171 | self.groups = {} 172 | self.original_json = jsonmsg 173 | doc = json.loads(jsonmsg) 174 | self.result = SolrResult.from_json(doc["response"], datefields) 175 | return self 176 | 177 | @classmethod 178 | def parse_term_vectors(cls, lst, path=""): 179 | """Transform a solr list to dict 180 | 181 | Turns [a, x, b, y, c, z ...] into {a: x, b: y, c: z ...} 182 | If the values are lists themselves, this is done recursively 183 | """ 184 | dct = dict() 185 | for i in range(0, len(lst), 2): 186 | k = lst[i] 187 | v = lst[i + 1] 188 | # Do not recurse too deep into warnings list 189 | if path != ".warnings" and isinstance(v, list): 190 | v = cls.parse_term_vectors(v, path + "." + k) 191 | dct[k] = v 192 | return dct 193 | 194 | def __str__(self): 195 | return str(self.result) 196 | 197 | def __len__(self): 198 | if self.groups: 199 | return len(getattr(self.groups, self.group_field)["groups"]) 200 | else: 201 | return len(self.result.docs) 202 | 203 | def __getitem__(self, key): 204 | if self.groups: 205 | return getattr(self.groups, self.group_field)["groups"][key] 206 | else: 207 | return self.result.docs[key] 208 | 209 | 210 | class SolrResult(object): 211 | @classmethod 212 | def from_json(cls, node, datefields=()): 213 | self = cls() 214 | self.name = "response" 215 | self.numFound = int(node["numFound"]) 216 | self.start = int(node["start"]) 217 | docs = node["docs"] 218 | self.docs = self._prepare_docs(docs, datefields) 219 | return self 220 | 221 | @staticmethod 222 | def _prepare_docs(docs, datefields): 223 | for doc in docs: 224 | for name, value in list(doc.items()): 225 | if scorched.dates.is_datetime_field(name, datefields): 226 | if is_iter(value): 227 | doc[name] = [scorched.dates.solr_date(v)._dt_obj for v in value] 228 | else: 229 | doc[name] = scorched.dates.solr_date(value)._dt_obj 230 | return docs 231 | 232 | def __str__(self): 233 | return "{numFound} results found, starting at #{start}".format( 234 | numFound=self.numFound, start=self.start 235 | ) 236 | 237 | 238 | class SolrGroupResult(object): 239 | @classmethod 240 | def from_json(cls, node, group_field, datefields=()): 241 | self = cls() 242 | self.name = "response" 243 | self.group_field = group_field 244 | groups = node[group_field]["groups"] 245 | setattr( 246 | self, 247 | group_field, 248 | { 249 | "matches": node[group_field]["matches"], 250 | "ngroups": node[group_field]["ngroups"], 251 | "groups": self._prepare_groups(groups, datefields), 252 | }, 253 | ) 254 | return self 255 | 256 | @staticmethod 257 | def _prepare_groups(groups, datefields): 258 | """Iterate over the docs and the groups and cast fields appropriately""" 259 | for group in groups: 260 | for doc in group["doclist"]["docs"]: 261 | for name, value in doc.items(): 262 | if scorched.dates.is_datetime_field(name, datefields): 263 | if is_iter(value): 264 | doc[name] = [ 265 | scorched.dates.solr_date(v)._dt_obj for v in value 266 | ] 267 | else: 268 | doc[name] = scorched.dates.solr_date(value)._dt_obj 269 | return groups 270 | 271 | def __str__(self): 272 | return "{ngroups} groups with {matches} matches found".format( 273 | ngroups=getattr(self, self.group_field)["ngroups"], 274 | matches=getattr(self, self.group_field)["matches"], 275 | ) 276 | -------------------------------------------------------------------------------- /scorched/strings.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | from scorched.compat import python_2_unicode_compatible, str 4 | 5 | 6 | class SolrString(str): 7 | # The behaviour below is only really relevant for String fields rather 8 | # than Text fields - most queryparsers will strip these characters out 9 | # for a text field anyway. 10 | lucene_special_chars = '+-&|!(){}[]^"~*?: \t\v\\/' 11 | 12 | def escape_for_lqs_term(self): 13 | if self in ["AND", "OR", "NOT", ""]: 14 | return '"%s"' % self 15 | chars = [] 16 | for c in self.chars: 17 | if isinstance(c, str) and c in self.lucene_special_chars: 18 | chars.append("\\%s" % c) 19 | else: 20 | chars.append("%s" % c) 21 | return "".join(chars) 22 | 23 | 24 | class RawString(SolrString): 25 | def __init__(self, s): 26 | self.chars = self 27 | 28 | 29 | class WildcardString(SolrString): 30 | def __init__(self, s): 31 | self.chars = self.get_wildcards(s) 32 | 33 | class SpecialChar(object): 34 | @python_2_unicode_compatible 35 | def __str__(self): 36 | return str(self.char) 37 | 38 | class Asterisk(SpecialChar): 39 | char = "*" 40 | 41 | class QuestionMark(SpecialChar): 42 | char = "?" 43 | 44 | def get_wildcards(self, s): 45 | backslash = False 46 | i = 0 47 | chars = [] 48 | for c in s: 49 | if backslash: 50 | backslash = False 51 | chars.append(c) 52 | continue 53 | i += 1 54 | if c == "\\": 55 | backslash = True 56 | elif c == "*": 57 | chars.append(self.Asterisk()) 58 | elif c == "?": 59 | chars.append(self.QuestionMark()) 60 | else: 61 | chars.append(c) 62 | if backslash: 63 | chars.append("\\") 64 | return chars 65 | 66 | 67 | class DismaxString(str): 68 | """A dismax query string that should not be escaped by the client.""" 69 | -------------------------------------------------------------------------------- /scorched/testing.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import unicode_literals 3 | import requests 4 | import os 5 | import unittest 6 | if not hasattr(unittest, "skip"): 7 | try: 8 | import unittest2 as unittest 9 | except: 10 | pass 11 | import warnings 12 | 13 | from scorched.compat import str 14 | 15 | 16 | def is_solr_available(dsn=None): 17 | if not dsn: 18 | dsn = os.environ.get("SOLR_URL", 19 | "http://localhost:8983/solr") 20 | if dsn is not None: 21 | try: 22 | requests.get(dsn, timeout=1) 23 | return True 24 | except Exception as e: 25 | print("Connection error:%s" % str(e)) 26 | return False 27 | 28 | 29 | def skip_unless_solr(func): 30 | """ 31 | Use this decorator to skip tests which need a functional Solr connection. 32 | The connection is given by the environment SOLR_URL 33 | """ 34 | 35 | if is_solr_available(): 36 | return func 37 | msg = "Test needs a running Solr connection (SOLR_URL)" 38 | warnings.warn(msg + str(func)) 39 | return unittest.skip(msg)(func) 40 | -------------------------------------------------------------------------------- /scorched/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /scorched/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import socket 3 | 4 | import pytest 5 | import requests 6 | from requests.exceptions import ConnectionError 7 | 8 | 9 | def get_unused_port(): 10 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 11 | s.bind(("localhost", 0)) 12 | addr, port = s.getsockname() 13 | s.close() 14 | return port 15 | 16 | 17 | def is_responsive(url): 18 | ping_url = f"{url}/admin/ping" 19 | try: 20 | response = requests.get(ping_url) 21 | if response.status_code == 200: 22 | return True 23 | except ConnectionError: 24 | return False 25 | 26 | 27 | @pytest.fixture(scope="session") 28 | def docker_compose_file(pytestconfig): 29 | # This is hackish. `docker_compose_file` is 30 | # called before the fixture `docker_services` is 31 | # executed and this is the only point where 32 | # we could inject our own custom port into the environment. 33 | # Quite after usage of the `docker_services` fixture 34 | # the container is still started and changes in environment 35 | # have no effect. 36 | # 37 | # ensure that we use an unused custom port to allow 38 | # for multiple instances to run simultanously 39 | port = get_unused_port() 40 | os.environ["SCORCHED_TEST_SOLR_PORT"] = str(port) 41 | return os.path.join( 42 | str(pytestconfig.rootdir), "scorched", "tests", "docker-compose.yml" 43 | ) 44 | 45 | 46 | @pytest.fixture(scope="session") 47 | def solr_url(docker_ip, docker_services): 48 | """Ensure that HTTP service is up and responsive.""" 49 | # `port_for` takes a container port and returns the corresponding host port 50 | port = docker_services.port_for("solr", 8983) 51 | solr_url = "http://{}:{}/solr/core0".format(docker_ip, port) 52 | docker_services.wait_until_responsive( 53 | timeout=30.0, pause=1.0, check=lambda: is_responsive(solr_url) 54 | ) 55 | os.environ["SOLR_URL"] = solr_url 56 | return solr_url 57 | -------------------------------------------------------------------------------- /scorched/tests/data/lipsum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lugensa/scorched/a1ca2970085c01bcde2177cee6e67b9dc40b86c6/scorched/tests/data/lipsum.pdf -------------------------------------------------------------------------------- /scorched/tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | solr: 4 | image: solr:8 5 | ports: 6 | - "${SCORCHED_TEST_SOLR_PORT:-44177}:8983" 7 | volumes: 8 | - ./:/tests 9 | command: | 10 | bash -c 'precreate-core core0 && 11 | cp -a /opt/solr/server/solr/configsets/sample_techproducts_configs/conf/* /var/solr/data/core0/conf/ && 12 | cp /tests/solrconfig_8.11.xml /var/solr/data/core0/conf/solrconfig.xml && 13 | solr-foreground' 14 | 15 | # docker run -d --rm -p 44177:8983 \ 16 | # -v $PWD/scorched/tests:/tests \ 17 | # --name my_solr solr:8 bash -c \ 18 | # "precreate-core core0 && "\ 19 | # "cp -a /opt/solr/server/solr/configsets/sample_techproducts_configs/conf/* /var/solr/data/core0/conf/ &&"\ 20 | # "cp /tests/solrconfig_8.11.xml /var/solr/data/core0/conf/solrconfig.xml && solr-foreground" 21 | -------------------------------------------------------------------------------- /scorched/tests/dumps/books.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id" : "978-0641723445", 4 | "cat" : ["book","hardcover"], 5 | "name" : "The Lightning Thief", 6 | "author" : "Rick Riordan", 7 | "series_t" : "Percy Jackson and the Olympians", 8 | "sequence_i" : 1, 9 | "genre_s" : "fantasy", 10 | "inStock" : true, 11 | "price" : 12.50, 12 | "pages_i" : 384 13 | } 14 | , 15 | { 16 | "id" : "978-1423103349", 17 | "cat" : ["book","paperback"], 18 | "name" : "The Sea of Monsters", 19 | "author" : "Rick Riordan", 20 | "series_t" : "Percy Jackson and the Olympians", 21 | "sequence_i" : 2, 22 | "genre_s" : "fantasy", 23 | "inStock" : true, 24 | "price" : 6.49, 25 | "pages_i" : 304 26 | } 27 | , 28 | { 29 | "id" : "978-1857995879", 30 | "cat" : ["book","paperback"], 31 | "name" : "Sophie's World : The Greek Philosophers", 32 | "author" : "Jostein Gaarder", 33 | "sequence_i" : 1, 34 | "genre_s" : "fantasy", 35 | "inStock" : true, 36 | "price" : 3.07, 37 | "pages_i" : 64 38 | } 39 | , 40 | { 41 | "id" : "978-1933988177", 42 | "cat" : ["book","paperback"], 43 | "name" : "Lucene in Action, Second Edition", 44 | "author" : "Michael McCandless", 45 | "sequence_i" : 1, 46 | "genre_s" : "IT", 47 | "inStock" : true, 48 | "price" : 30.50, 49 | "pages_i" : 475 50 | } 51 | ] 52 | -------------------------------------------------------------------------------- /scorched/tests/dumps/request_error.json: -------------------------------------------------------------------------------- 1 | { 2 | "responseHeader": { 3 | "status": 400, 4 | "QTime": 1, 5 | "params": { 6 | "facet": "true", 7 | "indent": "true", 8 | "q": "genre_s:fantasy", 9 | "_": "1394706864646", 10 | "facet.field": "cat", 11 | "wt": "json" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /scorched/tests/dumps/request_hl.json: -------------------------------------------------------------------------------- 1 | { 2 | "responseHeader": { 3 | "status": 0, 4 | "QTime": 1, 5 | "params": { 6 | "q": "author:John", 7 | "hl": "true", 8 | "hl.fl": "author", 9 | "wt": "json" 10 | } 11 | }, 12 | "response": { 13 | "numFound": 1, 14 | "start": 0, 15 | "docs": [ 16 | { 17 | "name": "The Höhlentripp Strauß", 18 | "author": "John Muir", 19 | "author_s": "John Muir", 20 | "series_t": "Percy Jackson and ☂nicode", 21 | "pages_i": 384, 22 | "genre_s": "fantasy", 23 | "id": "978", 24 | "sequence_i": 1, 25 | "inStock": true, 26 | "cat": [ 27 | "book", 28 | "hardcover" 29 | ], 30 | "price": 12.5, 31 | "price_c": "12.5,USD", 32 | "_version_": 1547482048566919168 33 | } 34 | ] 35 | }, 36 | "highlighting": { 37 | "978": { 38 | "author": [ 39 | "John Muir" 40 | ] 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /scorched/tests/dumps/request_hl_grouped.json: -------------------------------------------------------------------------------- 1 | { 2 | "responseHeader": { 3 | "status": 0, 4 | "QTime": 1, 5 | "params": { 6 | "q": "author:Muir", 7 | "hl": "true", 8 | "hl.fl": "author", 9 | "group.ngroups": "true", 10 | "wt": "json", 11 | "group.field": "inStock", 12 | "group": "true" 13 | } 14 | }, 15 | "grouped": { 16 | "inStock": { 17 | "matches": 2, 18 | "ngroups": 1, 19 | "groups": [ 20 | { 21 | "groupValue": true, 22 | "doclist": { 23 | "numFound": 2, 24 | "start": 0, 25 | "docs": [ 26 | { 27 | "name": "The Yosemite", 28 | "author": "John Muir", 29 | "author_s": "John Muir", 30 | "price": 12.5, 31 | "price_c": "12.5,USD", 32 | "important_dts": [ 33 | "1969-01-01T00:00:00Z", 34 | "1969-01-02T00:00:00Z" 35 | ], 36 | "inStock": true, 37 | "id": "978", 38 | "_version_": 1547485322340728832 39 | } 40 | ] 41 | } 42 | } 43 | ] 44 | } 45 | }, 46 | "highlighting": { 47 | "978": { 48 | "author": [ 49 | "John Muir" 50 | ] 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /scorched/tests/dumps/request_w_facets.json: -------------------------------------------------------------------------------- 1 | { 2 | "responseHeader": { 3 | "status": 0, 4 | "QTime": 1, 5 | "params": { 6 | "facet": "true", 7 | "indent": "true", 8 | "q": "genre_s:fantasy", 9 | "_": "1394706864646", 10 | "facet.field": "cat", 11 | "wt": "json" 12 | } 13 | }, 14 | "response": { 15 | "numFound": 3, 16 | "start": 0, 17 | "docs": [ 18 | { 19 | "id": "978-0641723445", 20 | "cat": [ 21 | "book", 22 | "hardcover" 23 | ], 24 | "name": "The Lightning Thief", 25 | "author": "Rick Riordan", 26 | "author_s": "Rick Riordan", 27 | "series_t": "Percy Jackson and the Olympians", 28 | "sequence_i": 1, 29 | "genre_s": "fantasy", 30 | "inStock": true, 31 | "price": 12.5, 32 | "price_c": "12.5,USD", 33 | "created_dt": "2009-07-23T03:24:34.000376Z", 34 | "modified": "2009-07-23T03:24:34.000376Z", 35 | "not_a_datetime_field_modified": "name of this field ends with modified but is not a datetime", 36 | "pages_i": 384, 37 | "_version_": 1462456002687271000 38 | }, 39 | { 40 | "id": "978-1423103349", 41 | "cat": [ 42 | "book", 43 | "paperback" 44 | ], 45 | "name": "The Sea of Monsters", 46 | "author": "Rick Riordan", 47 | "author_s": "Rick Riordan", 48 | "series_t": "Percy Jackson and the Olympians", 49 | "sequence_i": 2, 50 | "genre_s": "fantasy", 51 | "inStock": true, 52 | "price": 6.49, 53 | "price_c": "6.49,USD", 54 | "pages_i": 304, 55 | "_version_": 1462456002688319500 56 | }, 57 | { 58 | "id": "978-1857995879", 59 | "cat": [ 60 | "book", 61 | "paperback" 62 | ], 63 | "name": "Sophie's World : The Greek Philosophers", 64 | "author": "Jostein Gaarder", 65 | "author_s": "Jostein Gaarder", 66 | "sequence_i": 1, 67 | "genre_s": "fantasy", 68 | "inStock": true, 69 | "price": 3.07, 70 | "price_c": "3.07,USD", 71 | "pages_i": 64, 72 | "_version_": 1462456002689368000 73 | } 74 | ] 75 | }, 76 | "facet_counts": { 77 | "facet_queries": {}, 78 | "facet_fields": { 79 | "cat": [ 80 | "book", 81 | 3, 82 | "paperback", 83 | 2, 84 | "hardcover", 85 | 1 86 | ] 87 | }, 88 | "facet_dates": {}, 89 | "facet_ranges": { 90 | "created_dt": { 91 | "counts": [ 92 | "2009-01-01T00:00:00Z", 93 | 1, 94 | "2010-01-01T00:00:00Z", 95 | 0, 96 | "2011-01-01T00:00:00Z", 97 | 0 98 | ], 99 | "gap":"+1YEARS", 100 | "start":"2009-01-01T00:00:00Z", 101 | "end":"2012-01-01T00:00:00Z" 102 | } 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /scorched/tests/dumps/request_w_termvector.json: -------------------------------------------------------------------------------- 1 | { 2 | "responseHeader": { 3 | "status": 0, 4 | "QTime": 24, 5 | "params": { 6 | "tv.tf": "true", 7 | "indent": "true", 8 | "q": "*:*", 9 | "tv": "true", 10 | "_": "1441938054458", 11 | "tv.fl": "weighted_words,title", 12 | "tv.df": "true", 13 | "wt": "json", 14 | "rows": "2" 15 | } 16 | }, 17 | "response": { 18 | "numFound": 333940, 19 | "start": 0, 20 | "docs": [ 21 | { 22 | "title": "Medizinprodukteberater", 23 | "uid": "ffaa9370-5182-5810-b8a9-54b751ef0606", 24 | "date": "2015-09-09T16:42:20.735Z", 25 | "cuid": "ffaa9370-5182-5810-b8a9-54b751ef0606", 26 | "geohex": [ 27 | "378,70" 28 | ], 29 | "_version_": 1511854640452337700 30 | }, 31 | { 32 | "title": "Automatisierungstechniker m/w", 33 | "uid": "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f", 34 | "uri": "http://meega.de/1431501-automatisierungstechniker-m-w.html", 35 | "cuid": "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f", 36 | "geohex": [ 37 | "357,61" 38 | ], 39 | "_version_": 1511857045532311600 40 | } 41 | ] 42 | }, 43 | "termVectors": [ 44 | "uniqueKeyFieldName", 45 | "uid", 46 | "warnings", 47 | [ 48 | "noTermVectors", 49 | [ 50 | "title" 51 | ] 52 | ], 53 | "ffaa9370-5182-5810-b8a9-54b751ef0606", 54 | [ 55 | "uniqueKey", 56 | "ffaa9370-5182-5810-b8a9-54b751ef0606", 57 | "weighted_words", 58 | [ 59 | "denken", 60 | [ 61 | "tf", 62 | 1, 63 | "df", 64 | 10409 65 | ], 66 | "dienstfahrtzeug", 67 | [ 68 | "tf", 69 | 1, 70 | "df", 71 | 1 72 | ], 73 | "dokumentation", 74 | [ 75 | "tf", 76 | 1, 77 | "df", 78 | 19774 79 | ], 80 | "eigeninitiative", 81 | [ 82 | "tf", 83 | 1, 84 | "df", 85 | 11369 86 | ], 87 | "wirken", 88 | [ 89 | "tf", 90 | 1, 91 | "df", 92 | 106 93 | ] 94 | ] 95 | ], 96 | "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f", 97 | [ 98 | "uniqueKey", 99 | "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f", 100 | "weighted_words", 101 | [ 102 | "anlagen", 103 | [ 104 | "tf", 105 | 3, 106 | "df", 107 | 21484 108 | ], 109 | "instandhaltung", 110 | [ 111 | "tf", 112 | 2, 113 | "df", 114 | 11717 115 | ], 116 | "kontakte", 117 | [ 118 | "tf", 119 | 1, 120 | "df", 121 | 9893 122 | ], 123 | "wert", 124 | [ 125 | "tf", 126 | 1, 127 | "df", 128 | 8572 129 | ] 130 | ] 131 | ] 132 | ] 133 | } 134 | -------------------------------------------------------------------------------- /scorched/tests/test_connection.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import os 4 | import requests 5 | import scorched.connection 6 | import unittest 7 | 8 | from unittest import mock 9 | 10 | 11 | HTTPBIN = os.environ.get('HTTPBIN_URL', 'https://httpbin.org/') 12 | # Issue #1483: Make sure the URL always has a trailing slash 13 | HTTPBIN = HTTPBIN.rstrip('/') + '/' 14 | 15 | 16 | def httpbin(*suffix): 17 | """Returns url for HTTPBIN resource.""" 18 | return requests.compat.urljoin(HTTPBIN, '/'.join(suffix)) 19 | 20 | 21 | class TestConnection(unittest.TestCase): 22 | 23 | def _make_connection(self, url="http://localhost:8983/solr", 24 | http_connection=None, mode="r", retry_timeout=-1, 25 | max_length_get_url=2048): 26 | 27 | sc = scorched.connection.SolrConnection( 28 | url=url, 29 | http_connection=http_connection, 30 | mode=mode, 31 | retry_timeout=retry_timeout, 32 | max_length_get_url=max_length_get_url) 33 | 34 | return sc 35 | 36 | def test_readable(self): 37 | sc = self._make_connection() 38 | self.assertRaises(TypeError, sc.update, {}) 39 | 40 | def test_writeable(self): 41 | sc = self._make_connection(mode="w") 42 | self.assertRaises(TypeError, sc.mlt, []) 43 | self.assertRaises(TypeError, sc.select, {}) 44 | 45 | def test_mlt(self): 46 | sc = self._make_connection(mode="") 47 | with mock.patch.object(requests.Session, 'request', 48 | return_value=mock.Mock(status_code=500)): 49 | self.assertRaises(scorched.exc.SolrError, sc.mlt, []) 50 | # test content 51 | with mock.patch.object(requests.Session, 'request', 52 | return_value=mock.Mock(status_code=500)): 53 | self.assertRaises(scorched.exc.SolrError, sc.mlt, [], 54 | content="fooo") 55 | # test post building 56 | sc = self._make_connection(max_length_get_url=0) 57 | with mock.patch.object(requests.Session, 'request', 58 | return_value=mock.Mock(status_code=500)): 59 | self.assertRaises(scorched.exc.SolrError, sc.mlt, [], 60 | content="fooo") 61 | 62 | def test_select(self): 63 | sc = self._make_connection(max_length_get_url=0) 64 | with mock.patch.object(requests.Session, 'request', 65 | return_value=mock.Mock(status_code=500)): 66 | self.assertRaises(scorched.exc.SolrError, sc.select, []) 67 | 68 | def test_no_body_response_error(self): 69 | sc = self._make_connection(mode="") 70 | with mock.patch.object(requests.Session, 'request', 71 | return_value=mock.Mock(status_code=500)): 72 | self.assertRaises(scorched.exc.SolrError, sc.update, {"foo": 2}) 73 | self.assertRaises(scorched.exc.SolrError, sc.update, {}) 74 | 75 | def test_request(self): 76 | sc = self._make_connection(url="http://localhost:1234/none", mode="") 77 | self.assertRaises(Exception, sc.request, (), {}) 78 | 79 | def test_url_for_update(self): 80 | dsn = "http://localhost:1234/none" 81 | sc = self._make_connection(url=dsn) 82 | ret = sc.url_for_update() 83 | 84 | def dsn_url(path): 85 | return "%s%s" % (dsn, path) 86 | 87 | self.assertEqual(ret, dsn_url("/update/json")) 88 | # commitwithin 89 | ret = sc.url_for_update(commitWithin=2) 90 | self.assertEqual(ret, dsn_url("/update/json?commitWithin=2")) 91 | self.assertRaises(ValueError, sc.url_for_update, commitWithin="a") 92 | self.assertRaises(ValueError, sc.url_for_update, commitWithin=-1) 93 | # softCommit 94 | ret = sc.url_for_update(softCommit=True) 95 | self.assertEqual(ret, dsn_url("/update/json?softCommit=true")) 96 | ret = sc.url_for_update(softCommit=False) 97 | self.assertEqual(ret, dsn_url("/update/json?softCommit=false")) 98 | # optimize 99 | ret = sc.url_for_update(optimize=True) 100 | self.assertEqual(ret, dsn_url("/update/json?optimize=true")) 101 | ret = sc.url_for_update(optimize=False) 102 | self.assertEqual(ret, dsn_url("/update/json?optimize=false")) 103 | # waitSearcher 104 | ret = sc.url_for_update(waitSearcher=True) 105 | self.assertEqual(ret, dsn_url("/update/json?waitSearcher=true")) 106 | ret = sc.url_for_update(waitSearcher=False) 107 | self.assertEqual(ret, dsn_url("/update/json?waitSearcher=false")) 108 | # expungeDeletes 109 | ret = sc.url_for_update(commit=True, expungeDeletes=True) 110 | self.assertEqual( 111 | ret, dsn_url("/update/json?commit=true&expungeDeletes=true")) 112 | ret = sc.url_for_update(commit=True, expungeDeletes=False) 113 | self.assertEqual( 114 | ret, dsn_url("/update/json?commit=true&expungeDeletes=false")) 115 | self.assertRaises(ValueError, sc.url_for_update, expungeDeletes=True) 116 | # maxSegments 117 | ret = sc.url_for_update(optimize=True, maxSegments=2) 118 | self.assertEqual( 119 | ret, dsn_url("/update/json?maxSegments=2&optimize=true")) 120 | self.assertRaises( 121 | ValueError, sc.url_for_update, optimize=True, maxSegments="a") 122 | self.assertRaises( 123 | ValueError, sc.url_for_update, optimize=True, maxSegments=-1) 124 | self.assertRaises(ValueError, sc.url_for_update, maxSegments=2) 125 | 126 | def test_select_timeout(self): 127 | dsn = "http://localhost:1234/none" 128 | # max_length_get_url=99999: httbin doesn't support POST 129 | sc = scorched.connection.SolrConnection( 130 | url=dsn, http_connection=None, mode="", retry_timeout=-1, 131 | max_length_get_url=99999, search_timeout=3.0) 132 | sc.select_url = httpbin('delay/2') 133 | # delay 2.0s < 3.0s timeout, ok 134 | resp = sc.select([]) 135 | self.assertTrue(json.loads(resp)['url'].startswith(sc.select_url)) 136 | # delay 2.0s > 1.0s timeout, raise ReadTimeout 137 | sc.search_timeout = 1.0 138 | self.assertRaises(requests.exceptions.ReadTimeout, sc.select, []) 139 | sc.search_timeout = (5.0, 1.0) # (connect, read) 140 | self.assertRaises(requests.exceptions.ReadTimeout, sc.select, []) 141 | # delay 2.0s < 3.0s timeout, ok 142 | sc.search_timeout = (1.0, 3.0) # (connect, read) 143 | resp = sc.select([]) 144 | self.assertTrue(json.loads(resp)['url'].startswith(sc.select_url)) 145 | # Connecting to an invalid port should raise a ConnectionError 146 | sc.select_url = "https://httpbin.org:1/none/select" 147 | sc.search_timeout = 1.0 148 | self.assertRaises(requests.exceptions.ConnectTimeout, sc.select, []) 149 | sc.search_timeout = (1.0, 5.0) 150 | self.assertRaises(requests.exceptions.ConnectTimeout, sc.select, []) 151 | 152 | def test_basic_auth(self): 153 | hc = requests.Session() 154 | hc.auth = ('joe', 'Secret') 155 | 156 | dsn = "http://localhost:1234/none" 157 | sc = self._make_connection(url=dsn, http_connection=hc) 158 | sc.select_url = httpbin('/basic-auth/{0}/{1}'.format(*hc.auth)) 159 | 160 | resp = sc.select([]) 161 | self.assertTrue(json.loads(resp)['authenticated']) 162 | 163 | 164 | class TestSolrInterface(unittest.TestCase): 165 | 166 | def _make_one(self): 167 | import scorched.connection 168 | import scorched.tests.schema 169 | with mock.patch('scorched.connection.SolrInterface.init_schema') as \ 170 | init_schema: 171 | init_schema.return_value = scorched.tests.schema.schema 172 | si = scorched.connection.SolrInterface( 173 | 'http://localhost:2222/mysolr') 174 | return si 175 | 176 | def test__should_skip_value(self): 177 | sc = self._make_one() 178 | self.assertTrue(sc._should_skip_value(None)) 179 | self.assertTrue(sc._should_skip_value({'set': None})) 180 | self.assertFalse(sc._should_skip_value(1)) 181 | self.assertFalse(sc._should_skip_value({'set': 1})) 182 | 183 | def test__prepare_docs_does_not_alter_given_docs(self): 184 | sc = self._make_one() 185 | today = datetime.datetime.utcnow() 186 | docs = [{'last_modified': today}] 187 | sc._prepare_docs(docs) 188 | self.assertEqual(docs, [{'last_modified': today}]) 189 | 190 | def test__prepare_docs_converts_datetime(self): 191 | sc = self._make_one() 192 | dt = datetime.datetime(2014, 2, 18, 12, 12, 10) 193 | docs = [{'last_modified': dt}] 194 | result = sc._prepare_docs(docs) 195 | self.assertEqual(result[0]['last_modified'], "2014-02-18T12:12:10Z") 196 | 197 | def test__prepare_docs_converts_datetime_atomic_update(self): 198 | sc = self._make_one() 199 | dt = datetime.datetime(2014, 2, 18, 12, 12, 10) 200 | docs = [{'last_modified': {'set': dt}}] 201 | result = sc._prepare_docs(docs) 202 | self.assertEqual( 203 | result[0]['last_modified']['set'], 204 | '2014-02-18T12:12:10Z', 205 | ) 206 | -------------------------------------------------------------------------------- /scorched/tests/test_dates.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import pytz 3 | import unittest 4 | import scorched.exc 5 | import pytest 6 | 7 | from scorched.dates import (solr_date, datetime_from_w3_datestring, 8 | datetime_factory) 9 | from scorched.search import LuceneQuery 10 | 11 | not_utc = pytz.timezone('Etc/GMT-3') 12 | 13 | samples_from_pydatetimes = { 14 | "2009-07-23T03:24:34.000376Z": 15 | [datetime.datetime(2009, 7, 23, 3, 24, 34, 376), 16 | datetime.datetime(2009, 7, 23, 3, 24, 34, 376, pytz.utc)], 17 | "2009-07-23T00:24:34.000376Z": 18 | [not_utc.localize(datetime.datetime(2009, 7, 23, 3, 24, 34, 376)), 19 | datetime.datetime(2009, 7, 23, 0, 24, 34, 376, pytz.utc)], 20 | "2009-07-23T03:24:34Z": 21 | [datetime.datetime(2009, 7, 23, 3, 24, 34), 22 | datetime.datetime(2009, 7, 23, 3, 24, 34, tzinfo=pytz.utc)], 23 | "2009-07-23T00:24:34Z": 24 | [not_utc.localize(datetime.datetime(2009, 7, 23, 3, 24, 34)), 25 | datetime.datetime(2009, 7, 23, 0, 24, 34, tzinfo=pytz.utc)] 26 | } 27 | 28 | samples_from_strings = { 29 | # These will not have been serialized by us, but we should deal with them 30 | "2009-07-23T03:24:34Z": 31 | datetime.datetime(2009, 7, 23, 3, 24, 34, tzinfo=pytz.utc), 32 | "2009-07-23T03:24:34.1Z": 33 | datetime.datetime(2009, 7, 23, 3, 24, 34, 100000, pytz.utc), 34 | "2009-07-23T03:24:34.123Z": 35 | datetime.datetime(2009, 7, 23, 3, 24, 34, 122999, pytz.utc) 36 | } 37 | 38 | 39 | def check_solr_date_from_date(s, date, canonical_date): 40 | from scorched.compat import str 41 | assert str(solr_date(date)) == s, "Unequal representations of %r: %r and %r" % ( 42 | date, str(solr_date(date)), s) 43 | check_solr_date_from_string(s, canonical_date) 44 | 45 | 46 | def check_solr_date_from_string(s, date): 47 | assert solr_date(s)._dt_obj == date, "Unequal representations of %r: %r and %r" % ( 48 | solr_date(s)._dt_obj, date, s) 49 | 50 | 51 | @pytest.mark.parametrize( 52 | "dt_string,dt_objects", samples_from_pydatetimes.items()) 53 | def test_solr_date_from_pydatetimes(dt_string, dt_objects): 54 | check_solr_date_from_date(dt_string, dt_objects[0], dt_objects[1]) 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "dt_string,dt_object", samples_from_strings.items()) 59 | def test_solr_date_from_strings(dt_string, dt_object): 60 | check_solr_date_from_string(dt_string, dt_object) 61 | 62 | 63 | class TestDates(unittest.TestCase): 64 | 65 | def test_datetime_from_w3_datestring(self): 66 | self.assertRaises(ValueError, 67 | datetime_from_w3_datestring, "") 68 | self.assertEqual(datetime_from_w3_datestring("2009-07-23T03:24:34.123+16:50"), 69 | datetime.datetime(2009, 7, 23, 20, 14, 34, 122999, 70 | tzinfo=pytz.utc)) 71 | self.assertEqual(datetime_from_w3_datestring("2009-07-23T03:24:34.123-16:50"), 72 | datetime.datetime(2009, 7, 22, 10, 34, 34, 122999, 73 | tzinfo=pytz.utc)) 74 | 75 | def test_datetime_factory(self): 76 | self.assertRaises(ValueError, 77 | datetime_factory, year=1990, month=12, 78 | day=12345) 79 | 80 | def test_solr_date(self): 81 | self.assertRaises(scorched.exc.SolrError, solr_date, None) 82 | s = solr_date("2009-07-23T03:24:34.000376Z") 83 | s_older = solr_date("2007-07-23T03:24:34.000376Z") 84 | self.assertEqual(s.microsecond, 376) 85 | self.assertEqual(s, solr_date(s)) 86 | self.assertTrue(s == s) 87 | self.assertTrue(s > s_older) 88 | self.assertTrue(s_older < s) 89 | self.assertRaises(TypeError, s.__lt__, datetime.datetime(2009, 7, 22, 10)) 90 | if scorched.compat.is_py2: # pragma: no cover 91 | self.assertRaises(TypeError, s.__eq__, datetime.datetime(2009, 7, 22, 10)) 92 | else: # pragma: no cover 93 | self.assertFalse(s == "Foo") 94 | self.assertEqual(s.__repr__(), 'datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=)') 95 | 96 | def test_solr_date_from_str(self): 97 | # str here is original str from python 98 | self.assertTrue("'str'" in repr(str)) 99 | s = solr_date(str("2009-07-23T03:24:34.000376Z")) 100 | self.assertEqual(s, solr_date(s)) 101 | self.assertTrue(s == s) 102 | 103 | def test_solr_date_ranges(self): 104 | query = LuceneQuery() 105 | date = solr_date("2009-07-23T03:24:34.000376Z") 106 | query.Q(**{"last_modified__gt": date}) 107 | -------------------------------------------------------------------------------- /scorched/tests/test_functional.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | import pytest 5 | 6 | from scorched import SolrInterface 7 | 8 | 9 | class Book: 10 | def __init__(self, name, author, **other_kwargs): 11 | self.title = name 12 | self.author = author 13 | self.other_kwargs = other_kwargs 14 | 15 | def __repr__(self): 16 | return 'Book("%s", "%s")' % (self.title, self.author) 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def books(): 21 | file_ = os.path.join(os.path.dirname(__file__), "dumps", "books.json") 22 | with open(file_) as f: 23 | datajson = f.read() 24 | docs = json.loads(datajson) 25 | return docs 26 | 27 | 28 | @pytest.fixture 29 | def si(solr_url): 30 | si_ = SolrInterface(solr_url) 31 | yield si_ 32 | si_.delete_all() 33 | si_.commit() 34 | 35 | 36 | def test_get(si, books): 37 | res = si.get("978-1423103349") 38 | assert len(res) == 0 39 | 40 | si.add(books) 41 | res = si.get("978-1423103349") 42 | assert len(res) == 1 43 | assert res[0]["name"] == "The Sea of Monsters" 44 | 45 | res = si.get(["978-0641723445", "978-1423103349", "nonexist"]) 46 | assert len(res) == 2 47 | assert [x["name"] for x in res] == ["The Lightning Thief", "The Sea of Monsters"] 48 | 49 | si.commit() 50 | 51 | res = si.get(ids="978-1423103349", fields=["author"]) 52 | assert len(res) == 1 53 | assert list(res[0].keys()) == ["author"] 54 | 55 | 56 | def test_query(si, books): 57 | si.add(books) 58 | si.commit() 59 | res = si.query(genre_s="fantasy").execute() 60 | assert res.result.numFound == 3 61 | 62 | res = si.delete_by_ids(res.result.docs[0]["id"]) 63 | assert res.status == 0 64 | res = si.query(genre_s="fantasy").execute() 65 | si.commit() 66 | res = si.query(genre_s="fantasy").execute() 67 | assert res.result.numFound == 2 68 | res = si.query(genre_s="fantasy").execute(constructor=Book) 69 | 70 | # test constructor 71 | assert [x.title for x in res.result.docs] == [ 72 | "The Sea of Monsters", 73 | "Sophie's World : The Greek Philosophers", 74 | ] 75 | 76 | 77 | def test_cursor(si, books): 78 | si.add(books) 79 | si.commit() 80 | cursor = si.query(genre_s="fantasy").sort_by("id").cursor(rows=1) 81 | 82 | # Count how often we hit solr 83 | search_count = [0] 84 | old_search = cursor.search.interface.search 85 | 86 | def search_proxy(*args, **kwargs): 87 | search_count[0] += 1 88 | return old_search(*args, **kwargs) 89 | 90 | cursor.search.interface.search = search_proxy 91 | 92 | list(cursor) 93 | assert search_count[0] == 4 # 3 + 1 to realize we are done 94 | 95 | search_count = [0] 96 | cursor = si.query(genre_s="fantasy").sort_by("id").cursor(constructor=Book, rows=2) 97 | # test constructor 98 | assert [x.title for x in cursor] == [ 99 | "The Lightning Thief", 100 | "The Sea of Monsters", 101 | "Sophie's World : The Greek Philosophers", 102 | ] 103 | 104 | assert search_count[0] == 3 105 | 106 | # empty results 107 | search_count = [0] 108 | cursor = si.query(genre_s="nonexist").sort_by("id").cursor(constructor=Book) 109 | assert list(cursor) == [] 110 | assert search_count[0] == 1 111 | 112 | 113 | def test_rollback(si, books): 114 | si.add(books) 115 | si.commit() 116 | res = si.query(genre_s="fantasy").execute() 117 | assert res.result.numFound == 3 118 | # delete 119 | res = si.delete_by_ids(res.result.docs[0]["id"]) 120 | assert res.status == 0 121 | 122 | # rollback 123 | res = si.rollback() 124 | assert res.status == 0 125 | res = si.query(genre_s="fantasy").execute() 126 | assert res.result.numFound == 3 127 | 128 | 129 | def test_chunked_add(si, books): 130 | assert len(books) == 4 131 | # chunk size = 1, chunks = 4 132 | si.delete_all() 133 | res = si.add(books, chunk=1) 134 | assert len(res) == 4 135 | assert [r.status for r in res] == [0] * 4 136 | si.commit() 137 | res = si.query(genre_s="fantasy").execute() 138 | assert res.result.numFound == 3 139 | # chunk size = 2, chunks = 2 140 | si.delete_all() 141 | 142 | res = si.add(books, chunk=2) 143 | assert len(res) == 2 144 | assert [r.status for r in res] == [0] * 2 145 | si.commit() 146 | res = si.query(genre_s="fantasy").execute() 147 | assert res.result.numFound == 3 148 | 149 | 150 | def test_facet_query(si, books): 151 | res = si.add(books) 152 | assert res[0].status == 0 153 | si.commit() 154 | res = si.query(genre_s="fantasy").facet_by("cat").execute() 155 | assert res.result.numFound == 3 156 | assert [x["name"] for x in res.result.docs] == [ 157 | "The Lightning Thief", 158 | "The Sea of Monsters", 159 | "Sophie's World : The Greek Philosophers", 160 | ] 161 | 162 | assert res.facet_counts.__dict__ == { 163 | "facet_fields": {"cat": [("book", 3), ("paperback", 2), ("hardcover", 1)]}, 164 | "facet_dates": {}, 165 | "facet_queries": {}, 166 | "facet_ranges": {}, 167 | "facet_pivot": {}, 168 | } 169 | 170 | 171 | def test_filter_query(si, books): 172 | si.add(books) 173 | si.commit() 174 | res = ( 175 | si.query(si.Q(**{"*": "*"})) 176 | .filter(cat="hardcover") 177 | .filter(genre_s="fantasy") 178 | .execute() 179 | ) 180 | assert res.result.numFound == 1 181 | assert [x["name"] for x in res.result.docs] == ["The Lightning Thief"] 182 | 183 | 184 | def test_edismax_query(si, books): 185 | si.add(books) 186 | si.commit() 187 | res = ( 188 | si.query(si.Q(**{"*": "*"})) 189 | .filter(cat="hardcover") 190 | .filter(genre_s="fantasy") 191 | .alt_parser("edismax") 192 | .execute() 193 | ) 194 | assert res.result.numFound == 1 195 | assert [x["name"] for x in res.result.docs] == ["The Lightning Thief"] 196 | 197 | 198 | def test_mlt_component_query(si, books): 199 | si.add(books) 200 | si.commit() 201 | res = si.query(id="978-0641723445").mlt("genre_s", mintf=1, mindf=1).execute() 202 | # query shows only one 203 | assert res.result.numFound == 1 204 | # but in more like this we get two 205 | assert len(res.more_like_these["978-0641723445"].docs), 2 206 | assert [x["author"] for x in res.more_like_these["978-0641723445"].docs] == [ 207 | "Rick Riordan", 208 | "Jostein Gaarder", 209 | ] 210 | 211 | 212 | def test_encoding(si): 213 | docs = { 214 | "id": "978-0641723445", 215 | "cat": ["book", "hardcover"], 216 | "name": "The Höhlentripp Strauß", 217 | "author": "Röüß Itoa", 218 | "series_t": "Percy Jackson and \N{UMBRELLA}nicode", 219 | "sequence_i": 1, 220 | "genre_s": "fantasy", 221 | "inStock": True, 222 | "price": 12.50, 223 | "pages_i": 384, 224 | } 225 | si.add(docs) 226 | si.commit() 227 | res = si.query(author=u"Röüß").execute() 228 | assert res.result.numFound == 1 229 | for k, v in docs.items(): 230 | assert res.result.docs[0][k] == v 231 | 232 | 233 | def test_multi_value_dates(si): 234 | docs = { 235 | "id": "978", 236 | "important_dts": [ 237 | "1969-01-01", 238 | "1969-01-02", 239 | ], 240 | } 241 | si.add(docs) 242 | si.commit() 243 | _ = si.query(id=u"978").execute() 244 | 245 | 246 | def test_highlighting(si): 247 | docs = { 248 | "id": "978-0641723445", 249 | "cat": ["book", "hardcover"], 250 | "name": "The Höhlentripp Strauß", 251 | "author": "Röüß Itoa", 252 | "series_t": "Percy Jackson and \N{UMBRELLA}nicode", 253 | "sequence_i": 1, 254 | "genre_s": "fantasy", 255 | "inStock": True, 256 | "price": 12.50, 257 | "pages_i": 384, 258 | } 259 | si.add(docs) 260 | si.commit() 261 | res = si.query(author=u"Röüß").highlight("author").execute() 262 | highlighted_field_result = "Röüß Itoa" 263 | # Does the highlighting attribute work? 264 | assert res.highlighting["978-0641723445"]["author"][0] == highlighted_field_result 265 | 266 | # Does each item have highlighting attributes? 267 | assert ( 268 | res.result.docs[0]["solr_highlights"]["author"][0] == highlighted_field_result 269 | ) 270 | 271 | 272 | def test_count(si): 273 | docs = [ 274 | { 275 | "id": "1", 276 | "genre_s": "fantasy", 277 | }, 278 | { 279 | "id": "2", 280 | "genre_s": "fantasy", 281 | }, 282 | ] 283 | si.add(docs) 284 | si.commit() 285 | ungrouped_count = si.query(genre_s="fantasy").count() 286 | ungrouped_count_expected = 2 287 | assert ungrouped_count == ungrouped_count_expected 288 | grouped_count = si.query(genre_s="fantasy").group_by("genre_s").count() 289 | grouped_count_expected = 1 290 | assert grouped_count == grouped_count_expected 291 | 292 | 293 | def test_debug(si): 294 | docs = { 295 | "id": "978-0641723445", 296 | "cat": ["book", "hardcover"], 297 | "name": "The Höhlentripp Strauß", 298 | "author": "Röüß Itoa", 299 | "series_t": "Percy Jackson and \N{UMBRELLA}nicode", 300 | "sequence_i": 1, 301 | "genre_s": "fantasy", 302 | "inStock": True, 303 | "price": 12.50, 304 | "pages_i": 384, 305 | } 306 | si.add(docs) 307 | si.commit() 308 | res = si.query(author="Röüß").debug().execute() 309 | assert res.result.numFound == 1 310 | for k, v in docs.items(): 311 | assert res.result.docs[0][k] == v 312 | assert "explain" in res.debug 313 | # deactivate 314 | res = si.query(author="Röüß").execute() 315 | assert "explain" not in res.debug 316 | 317 | 318 | def test_spellcheck(si): 319 | opts = si.query(name=u"Monstes").spellcheck().options() 320 | assert {"q": "name:Monstes", "spellcheck": True} == opts 321 | 322 | 323 | def test_extract(si): 324 | pdf = os.path.join(os.path.dirname(__file__), "data", "lipsum.pdf") 325 | with open(pdf, "rb") as f: 326 | data = si.extract(f) 327 | assert 0 == data.status 328 | assert "Lorem ipsum" in data.text 329 | assert ["pdfTeX-1.40.13"] == data.metadata["producer"] 330 | 331 | 332 | def test_mlt(si, books): 333 | si.add(books) 334 | si.commit() 335 | res = ( 336 | si.mlt_query("genre_s", interestingTerms="details", mintf=1, mindf=1) 337 | .query(id="978-0641723445") 338 | .execute() 339 | ) 340 | assert res.result.numFound == 2 341 | assert res.interesting_terms == ["genre_s:fantasy", 1.0] 342 | assert [x["author"] for x in res.result.docs] == ["Rick Riordan", "Jostein Gaarder"] 343 | -------------------------------------------------------------------------------- /scorched/tests/test_response.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os.path 3 | import unittest 4 | 5 | import pytz 6 | 7 | import scorched.response 8 | 9 | 10 | class ResultsTestCase(unittest.TestCase): 11 | def setUp(self): 12 | file_path = os.path.join( 13 | os.path.dirname(__file__), "dumps", "request_w_facets.json" 14 | ) 15 | with open(file_path) as f: 16 | self.data = f.read() 17 | # termVector data 18 | file_path = os.path.join( 19 | os.path.dirname(__file__), "dumps", "request_w_termvector.json" 20 | ) 21 | with open(file_path) as f: 22 | self.data_tv = f.read() 23 | # error data 24 | file_path = os.path.join( 25 | os.path.dirname(__file__), "dumps", "request_error.json" 26 | ) 27 | with open(file_path) as f: 28 | self.data_error = f.read() 29 | 30 | file_path = os.path.join(os.path.dirname(__file__), "dumps", "request_hl.json") 31 | with open(file_path) as f: 32 | self.data_hl = f.read() 33 | 34 | file_path = os.path.join( 35 | os.path.dirname(__file__), "dumps", "request_hl_grouped.json" 36 | ) 37 | with open(file_path) as f: 38 | self.data_hl_grouped = f.read() 39 | 40 | def test_response(self): 41 | res = scorched.response.SolrResponse.from_json( 42 | self.data, "id", datefields=("*_dt", "modified") 43 | ) 44 | self.assertEqual(res.status, 0) 45 | self.assertEqual(res.QTime, 1) 46 | self.assertEqual(res.result.numFound, 3) 47 | # iterable 48 | self.assertEqual( 49 | [x["name"] for x in res], 50 | [ 51 | "The Lightning Thief", 52 | "The Sea of Monsters", 53 | "Sophie's World : The Greek Philosophers", 54 | ], 55 | ) 56 | self.assertEqual( 57 | [x["name"] for x in res.result.docs], 58 | [ 59 | "The Lightning Thief", 60 | "The Sea of Monsters", 61 | "Sophie's World : The Greek Philosophers", 62 | ], 63 | ) 64 | self.assertEqual( 65 | [x["created_dt"] for x in res.result.docs if "created_dt" in x], 66 | [datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=pytz.utc)], 67 | ) 68 | self.assertEqual( 69 | [x["modified"] for x in res.result.docs if "modified" in x], 70 | [datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=pytz.utc)], 71 | ) 72 | self.assertEqual( 73 | res.facet_counts.__dict__, 74 | { 75 | "facet_fields": { 76 | "cat": [("book", 3), ("paperback", 2), ("hardcover", 1)] 77 | }, 78 | "facet_dates": {}, 79 | "facet_queries": {}, 80 | "facet_ranges": { 81 | "created_dt": { 82 | "gap": "+1YEARS", 83 | "start": "2009-01-01T00:00:00Z", 84 | "end": "2012-01-01T00:00:00Z", 85 | "counts": [ 86 | ("2009-01-01T00:00:00Z", 1), 87 | ("2010-01-01T00:00:00Z", 0), 88 | ("2011-01-01T00:00:00Z", 0), 89 | ], 90 | }, 91 | }, 92 | "facet_pivot": {}, 93 | }, 94 | ) 95 | 96 | self.assertRaises(ValueError, res.from_json, self.data_error, "id") 97 | self.assertEqual(res.__str__(), "3 results found, starting at #0") 98 | self.assertEqual(len(res), 3) 99 | 100 | def test_term_vectors(self): 101 | res_tv = scorched.response.SolrResponse.from_json( 102 | self.data_tv, "id", datefields=("date",) 103 | ) 104 | self.assertEqual(res_tv.term_vectors["uniqueKeyFieldName"], "uid") 105 | self.assertEqual(res_tv.term_vectors["warnings"], {"noTermVectors": ["title"]}) 106 | self.assertEqual( 107 | res_tv.term_vectors["ffaa9370-5182-5810-b8a9-54b751ef0606"]["uniqueKey"], 108 | "ffaa9370-5182-5810-b8a9-54b751ef0606", 109 | ) 110 | self.assertEqual( 111 | res_tv.term_vectors["ffaa9370-5182-5810-b8a9-54b751ef0606"][ 112 | "weighted_words" 113 | ]["wirken"], 114 | {"tf": 1, "df": 106}, 115 | ) 116 | self.assertEqual( 117 | res_tv.term_vectors["9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f"][ 118 | "weighted_words" 119 | ]["anlagen"], 120 | {"tf": 3, "df": 21484}, 121 | ) 122 | 123 | def test_highlighting(self): 124 | res_hl = scorched.response.SolrResponse.from_json(self.data_hl, "id") 125 | highlights = {"author": ["John Muir"]} 126 | self.assertEqual(res_hl.highlighting["978"], highlights) 127 | self.assertEqual(res_hl.result.docs[0]["solr_highlights"], highlights) 128 | 129 | def test_highlighting_with_grouping(self): 130 | res_hl_group = scorched.response.SolrResponse.from_json( 131 | self.data_hl_grouped, "id", datefields=("important_dts",) 132 | ) 133 | self.assertEqual(res_hl_group.group_field, "inStock") 134 | self.assertEqual( 135 | getattr(res_hl_group.groups, res_hl_group.group_field)["matches"], 2 136 | ) 137 | ngroups = getattr(res_hl_group.groups, res_hl_group.group_field)["ngroups"] 138 | self.assertEqual(ngroups, 1) 139 | 140 | groups = getattr(res_hl_group.groups, res_hl_group.group_field)["groups"] 141 | self.assertEqual(len(groups), ngroups) 142 | 143 | highlights = {"author": ["John Muir"]} 144 | self.assertEqual(groups[0]["doclist"]["docs"][0]["solr_highlights"], highlights) 145 | self.assertEqual( 146 | type(groups[0]["doclist"]["docs"][0]["important_dts"][0]), datetime.datetime 147 | ) 148 | -------------------------------------------------------------------------------- /scorched/tests/test_search.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from scorched.exc import SolrError 3 | from scorched.search import (SolrSearch, MltSolrSearch, PaginateOptions, 4 | SortOptions, FieldLimitOptions, FacetOptions, 5 | GroupOptions, HighlightOptions, DismaxOptions, 6 | MoreLikeThisOptions, EdismaxOptions, 7 | PostingsHighlightOptions, FacetPivotOptions, 8 | RequestHandlerOption, DebugOptions, 9 | params_from_dict, FacetRangeOptions, 10 | TermVectorOptions, StatOptions, 11 | is_iter) 12 | from scorched.strings import WildcardString 13 | import pytest 14 | 15 | 16 | debug = False 17 | 18 | base_good_query_data = { 19 | "query_by_term": [ 20 | (["hello"], {}, 21 | [("q", b"hello")]), 22 | (["hello"], {"int_field": 3}, 23 | [("q", b"hello AND int_field:3")]), 24 | (["hello", "world"], {}, 25 | [("q", b"hello AND world")]), 26 | # NB this next is not really what we want, 27 | # probably this should warn 28 | (["hello world"], {}, 29 | [("q", b"hello\\ world")]), 30 | ], 31 | 32 | "query_by_phrase": [ 33 | (["hello"], {}, 34 | [("q", b"hello")]), 35 | (["hello"], {"int_field": 3}, 36 | # Non-text data is always taken to be a term, and terms come before 37 | # phrases, so order is reversed 38 | [("q", b"int_field:3 AND hello")]), 39 | (["hello", "world"], {}, 40 | [("q", b"hello AND world")]), 41 | (["hello world"], {}, 42 | [("q", b"hello\\ world")]), 43 | ([], {'string_field': ['hello world', 'goodbye, cruel world']}, 44 | [("q", b"string_field:goodbye,\\ cruel\\ world AND string_field:hello\\ world")]), 45 | ], 46 | 47 | "query": [ 48 | # Basic queries 49 | (["hello"], {}, 50 | [("q", b"hello")]), 51 | (["hello"], {"int_field": 3}, 52 | [("q", b"hello AND int_field:3")]), 53 | (["hello", "world"], {}, 54 | [("q", b"hello AND world")]), 55 | (["hello world"], {}, 56 | [("q", b"hello\\ world")]), 57 | # Test fields 58 | # Boolean fields take any truth-y value 59 | ([], {"boolean_field": True}, 60 | [("q", b"boolean_field:true")]), 61 | ([], {"boolean_field": 'true'}, 62 | [("q", b"boolean_field:true")]), 63 | ([], {"boolean_field": "false"}, 64 | [("q", b"boolean_field:false")]), 65 | ([], {"boolean_field": False}, 66 | [("q", b"boolean_field:false")]), 67 | ([], {"int_field": 3}, 68 | [("q", b"int_field:3")]), 69 | ([], {"sint_field": 3}, 70 | [("q", b"sint_field:3")]), 71 | ([], {"long_field": 2 ** 31}, 72 | [("q", b"long_field:2147483648")]), 73 | ([], {"slong_field": 2 ** 31}, 74 | [("q", b"slong_field:2147483648")]), 75 | ([], {"float_field": 3.0}, 76 | [("q", b"float_field:3.0")]), 77 | ([], {"sfloat_field": 3.0}, 78 | [("q", b"sfloat_field:3.0")]), 79 | ([], {"double_field": 3.0}, 80 | [("q", b"double_field:3.0")]), 81 | ([], {"sdouble_field": 3.0}, 82 | [("q", b"sdouble_field:3.0")]), 83 | ([], {"date_field": datetime.datetime(2009, 1, 1)}, 84 | [("q", b"date_field:2009\\-01\\-01T00\\:00\\:00Z")]), 85 | # Test ranges 86 | ([], {"int_field__any": True}, 87 | [("q", b"int_field:[* TO *]")]), 88 | ([], {"int_field__lt": 3}, 89 | [("q", b"int_field:{* TO 3}")]), 90 | ([], {"int_field__gt": 3}, 91 | [("q", b"int_field:{3 TO *}")]), 92 | ([], {"int_field__rangeexc": (-3, 3)}, 93 | [("q", b"int_field:{\\-3 TO 3}")]), 94 | ([], {"int_field__rangeexc": (3, -3)}, 95 | [("q", b"int_field:{\\-3 TO 3}")]), 96 | ([], {"int_field__lte": 3}, 97 | [("q", b"int_field:[* TO 3]")]), 98 | ([], {"int_field__gte": 3}, 99 | [("q", b"int_field:[3 TO *]")]), 100 | ([], {"int_field__range": (-3, 3)}, 101 | [("q", b"int_field:[\\-3 TO 3]")]), 102 | ([], {"int_field__range": (3, -3)}, 103 | [("q", b"int_field:[\\-3 TO 3]")]), 104 | ([], {"date_field__lt": datetime.datetime(2009, 1, 1)}, 105 | [("q", b"date_field:{* TO 2009\\-01\\-01T00\\:00\\:00Z}")]), 106 | ([], {"date_field__gt": datetime.datetime(2009, 1, 1)}, 107 | [("q", b"date_field:{2009\\-01\\-01T00\\:00\\:00Z TO *}")]), 108 | ([], { 109 | "date_field__rangeexc": (datetime.datetime(2009, 1, 1), datetime.datetime(2009, 1, 2))}, 110 | [("q", b"date_field:{2009\\-01\\-01T00\\:00\\:00Z TO 2009\\-01\\-02T00\\:00\\:00Z}")]), 111 | ([], {"date_field__lte": datetime.datetime(2009, 1, 1)}, 112 | [("q", b"date_field:[* TO 2009\\-01\\-01T00\\:00\\:00Z]")]), 113 | ([], {"date_field__gte": datetime.datetime(2009, 1, 1)}, 114 | [("q", b"date_field:[2009\\-01\\-01T00\\:00\\:00Z TO *]")]), 115 | ([], { 116 | "date_field__range": (datetime.datetime(2009, 1, 1), datetime.datetime(2009, 1, 2))}, 117 | [("q", b"date_field:[2009\\-01\\-01T00\\:00\\:00Z TO 2009\\-01\\-02T00\\:00\\:00Z]")]), 118 | ([], {'string_field': ['hello world', 'goodbye, cruel world']}, 119 | [("q", b"string_field:goodbye,\\ cruel\\ world AND string_field:hello\\ world")]), 120 | # Raw strings 121 | ([], {'string_field': "abc*???"}, 122 | [("q", b"string_field:abc\\*\\?\\?\\?")]), 123 | ], 124 | } 125 | 126 | good_query_data = { 127 | "filter_by_term": [ 128 | (["hello"], {}, 129 | [("fq", b"hello"), ("q", b"*:*")]), 130 | # test multiple fq 131 | (["hello"], {"int_field": 3}, 132 | [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]), 133 | (["hello", "world"], {}, 134 | [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]), 135 | # NB this next is not really what we want, 136 | # probably this should warn 137 | (["hello world"], {}, 138 | [("fq", b"hello\\ world"), ("q", b"*:*")]), 139 | ], 140 | 141 | "filter_by_phrase": [ 142 | (["hello"], {}, 143 | [("fq", b"hello"), ("q", b"*:*")]), 144 | # test multiple fq 145 | (["hello"], {"int_field": 3}, 146 | [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]), 147 | (["hello", "world"], {}, 148 | [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]), 149 | (["hello world"], {}, 150 | [("fq", b"hello\\ world"), ("q", b"*:*")]), 151 | ], 152 | 153 | "filter": [ 154 | (["hello"], {}, 155 | [("fq", b"hello"), ("q", b"*:*")]), 156 | # test multiple fq 157 | (["hello"], {"int_field": 3}, 158 | [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]), 159 | (["hello", "world"], {}, 160 | [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]), 161 | (["hello world"], {}, 162 | [("fq", b"hello\\ world"), ("q", b"*:*")]), 163 | ], 164 | } 165 | good_query_data.update(base_good_query_data) 166 | 167 | 168 | def check_query_data(method, args, kwargs, output): 169 | solr_search = SolrSearch(None) 170 | p = getattr(solr_search, method)(*args, **kwargs).params() 171 | assert p == output, "Unequal: %r, %r" % (p, output) 172 | 173 | 174 | def check_mlt_query_data(method, args, kwargs, output): 175 | solr_search = MltSolrSearch(None) 176 | p = getattr(solr_search, method)(*args, **kwargs).params() 177 | assert p == output, "Unequal: %r, %r" % (p, output) 178 | 179 | 180 | good_option_data = { 181 | PaginateOptions: ( 182 | ({"start": 5, "rows": 10}, 183 | {"start": 5, "rows": 10}), 184 | ({"start": 5, "rows": None}, 185 | {"start": 5}), 186 | ({"start": None, "rows": 10}, 187 | {"rows": 10}), 188 | ), 189 | FacetOptions: ( 190 | ({"fields": "int_field"}, 191 | {"facet": True, "facet.field": ["int_field"]}), 192 | ({"fields": ["int_field", "text_field"]}, 193 | {"facet": True, "facet.field": ["int_field", "text_field"]}), 194 | ({"prefix": "abc"}, 195 | {"facet": True, "facet.prefix": "abc"}), 196 | ({"prefix": "abc", "sort": True, "limit": 3, "offset": 25, "mincount": 1, "missing": False, "method": "enum"}, 197 | {"facet": True, "facet.prefix": "abc", "facet.sort": True, "facet.limit": 3, "facet.offset": 25, "facet.mincount": 1, "facet.missing": False, "facet.method": "enum"}), 198 | ({"fields": "int_field", "prefix": "abc"}, 199 | {"facet": True, "facet.field": ["int_field"], "f.int_field.facet.prefix": "abc"}), 200 | ({"fields": "int_field", "prefix": "abc", "limit": 3}, 201 | {"facet": True, "facet.field": ["int_field"], "f.int_field.facet.prefix": "abc", "f.int_field.facet.limit": 3}), 202 | ({"fields": ["int_field", "text_field"], "prefix": "abc", "limit": 3}, 203 | {"facet": True, "facet.field": ["int_field", "text_field"], "f.int_field.facet.prefix": "abc", "f.int_field.facet.limit": 3, "f.text_field.facet.prefix": "abc", "f.text_field.facet.limit": 3, }), 204 | ), 205 | FacetRangeOptions: ( 206 | ({"fields": "field1", "start": 10, "end": 20, "gap": 2, "hardend": False, 207 | "include": "outer", "other": "all", "limit": 10, "mincount": 1}, 208 | {"facet": True, "facet.range": ["field1"], "f.field1.facet.range.start": 10, 209 | "f.field1.facet.range.end": 20, "f.field1.facet.range.gap": 2, 210 | "f.field1.facet.range.hardend": "false", "f.field1.facet.range.include": "outer", 211 | "f.field1.facet.range.other": "all", "f.field1.facet.limit": 1, 212 | "f.field1.facet.mincount": 1}), 213 | ), 214 | FacetPivotOptions: ( 215 | ({"fields": ["text_field"]}, 216 | {"facet": True, "facet.pivot": "text_field"}), 217 | ({"fields": ["int_field", "text_field"]}, 218 | {"facet": True, "facet.pivot": "int_field,text_field"}), 219 | ({"fields": ["int_field", "text_field"], "mincount": 2}, 220 | {"facet": True, "facet.pivot": "int_field,text_field", "facet.pivot.mincount": 2}), 221 | ), 222 | GroupOptions: ( 223 | ({"field": "int_field", "limit": 10}, 224 | {"group": True, "group.limit": 10, "group.field": "int_field"}), 225 | ), 226 | SortOptions: ( 227 | ({"field": "int_field"}, 228 | {"sort": "int_field asc"}), 229 | ({"field": "-int_field"}, 230 | {"sort": "int_field desc"}), 231 | ), 232 | HighlightOptions: ( 233 | ({"fields": "int_field"}, 234 | {"hl": True, "hl.fl": "int_field"}), 235 | ({"fields": ["int_field", "text_field"]}, 236 | {"hl": True, "hl.fl": "int_field,text_field"}), 237 | ({"snippets": 3}, 238 | {"hl": True, "hl.snippets": 3}), 239 | ({"snippets": 3, "fragsize": 5, "mergeContinuous": True, "requireFieldMatch": True, "maxAnalyzedChars": 500, "alternateField": "text_field", "maxAlternateFieldLength": 50, "formatter": "simple", "simple.pre": "", "simple.post": "", "fragmenter": "regex", "usePhraseHighlighter": True, "highlightMultiTerm": True, "regex.slop": 0.2, "regex.pattern": "\\w", "regex.maxAnalyzedChars": 100}, 240 | {"hl": True, "hl.snippets": 3, "hl.fragsize": 5, "hl.mergeContinuous": True, "hl.requireFieldMatch": True, "hl.maxAnalyzedChars": 500, "hl.alternateField": "text_field", "hl.maxAlternateFieldLength": 50, "hl.formatter": "simple", "hl.simple.pre": "", "hl.simple.post": "", "hl.fragmenter": "regex", "hl.usePhraseHighlighter": True, "hl.highlightMultiTerm": True, "hl.regex.slop": 0.2, "hl.regex.pattern": "\\w", "hl.regex.maxAnalyzedChars": 100}), 241 | ({"fields": "int_field", "snippets": "3"}, 242 | {"hl": True, "hl.fl": "int_field", "f.int_field.hl.snippets": 3}), 243 | ({"fields": "int_field", "snippets": 3, "fragsize": 5}, 244 | {"hl": True, "hl.fl": "int_field", "f.int_field.hl.snippets": 3, "f.int_field.hl.fragsize": 5}), 245 | ({"fields": ["int_field", "text_field"], "snippets": 3, "fragsize": 5}, 246 | {"hl": True, "hl.fl": "int_field,text_field", "f.int_field.hl.snippets": 3, "f.int_field.hl.fragsize": 5, "f.text_field.hl.snippets": 3, "f.text_field.hl.fragsize": 5}), 247 | ), 248 | PostingsHighlightOptions: ( 249 | ({"fields": "int_field"}, 250 | {"hl": True, "hl.fl": "int_field"}), 251 | ({"fields": ["int_field", "text_field"]}, 252 | {"hl": True, "hl.fl": "int_field,text_field"}), 253 | ({"snippets": 3}, 254 | {"hl": True, "hl.snippets": 3}), 255 | ({"fields": ["int_field", "text_field"], "snippets": 1, 256 | "tag.pre": "<em>", "tag.post": "<em>", 257 | "tag.ellipsis": "...", "defaultSummary": True, "encoder": "simple", 258 | "score.k1": 1.2, "score.b": 0.75, "score.pivot": 87, 259 | "bs.type": "SENTENCE", "maxAnalyzedChars": 10000, }, 260 | {'f.text_field.hl.score.b': 0.75, 'f.int_field.hl.encoder': 'simple', 261 | 'f.int_field.hl.tag.pre': '<em>', 'f.text_field.hl.tag.pre': 262 | '<em>', 'f.text_field.hl.defaultSummary': True, 263 | 'f.text_field.hl.tag.post': '<em>', 'f.text_field.hl.bs.type': 264 | 'SENTENCE', 'f.int_field.hl.tag.ellipsis': '...', 265 | 'f.text_field.hl.score.k1': 1.2, 'f.text_field.hl.tag.ellipsis': 266 | '...', 'f.int_field.hl.score.pivot': 87.0, 267 | 'f.int_field.hl.tag.post': '<em>', 'f.int_field.hl.bs.type': 268 | 'SENTENCE', 'f.int_field.hl.score.b': 0.75, 269 | 'f.text_field.hl.maxAnalyzedChars': '10000', 'hl': True, 270 | 'f.text_field.hl.encoder': 'simple', 'hl.fl': 271 | 'int_field,text_field', 'f.int_field.hl.snippets': 1, 272 | 'f.text_field.hl.snippets': 1, 'f.int_field.hl.maxAnalyzedChars': 273 | '10000', 'f.int_field.hl.score.k1': 1.2, 274 | 'f.int_field.hl.defaultSummary': True, 'f.text_field.hl.score.pivot': 275 | 87.0}), 276 | ), 277 | MoreLikeThisOptions: ( 278 | ({"fields": "int_field"}, 279 | {"mlt": True, "mlt.fl": "int_field"}), 280 | ({"fields": ["int_field", "text_field"]}, 281 | {"mlt": True, "mlt.fl": "int_field,text_field"}), 282 | ({"fields": ["text_field", "string_field"], "query_fields": {"text_field": 0.25, "string_field": 0.75}}, 283 | {"mlt": True, "mlt.fl": "string_field,text_field", "mlt.qf": "text_field^0.25 string_field^0.75"}), 284 | ({"fields": "text_field", "count": 1}, 285 | {"mlt": True, "mlt.fl": "text_field", "mlt.count": 1}), 286 | ), 287 | TermVectorOptions: ( 288 | ({}, 289 | {"tv": True}), 290 | ({"offsets": True}, 291 | {"tv": True, "tv.offsets": True}), 292 | ({"fields": "text_field"}, 293 | {"tv": True, "tv.fl": "text_field"}), 294 | ({"fields": ["int_field", "text_field"]}, 295 | {"tv": True, "tv.fl": "int_field, text_field"}), 296 | ({"all": True, "df": 1, "offsets": 0, "positions": False, 297 | "payloads": "true", "tf": False, "tf_idf": True}, 298 | {'tv': True, 'tv.df': True, 'tv.all': True, 'tv.tf_idf': True, 299 | 'tv.tf': False, 'tv.offsets': False, 'tv.payloads': True, 300 | 'tv.positions': False}), 301 | ({"fields": "text_field", "all": True}, 302 | {'tv': True, 'tv.fl': 'text_field', 'f.text_field.tv.all': True}), 303 | ({"fields": ["int_field", "text_field"], "tf": True}, 304 | {'tv': True, 'tv.fl': 'int_field,text_field', 305 | 'f.text_field.tv.tf': True, 'f.int_field.tv.tf': True}), 306 | ), 307 | DismaxOptions: ( 308 | ({"qf": {"text_field": 0.25, "string_field": 0.75}}, 309 | {'defType': 'dismax', 'qf': 'text_field^0.25 string_field^0.75'}), 310 | ({"pf": {"text_field": 0.25, "string_field": 0.75}}, 311 | {'defType': 'dismax', 'pf': 'text_field^0.25 string_field^0.75'}), 312 | ({"qf": {"text_field": 0.25, "string_field": 0.75}, "mm": 2}, 313 | {'mm': 2, 'defType': 'dismax', 'qf': 'text_field^0.25 string_field^0.75'}), 314 | ), 315 | EdismaxOptions: ( 316 | ({"qf": {"text_field": 0.25, "string_field": 0.75}}, 317 | {'defType': 'edismax', 'qf': 'text_field^0.25 string_field^0.75'}), 318 | ({"pf": {"text_field": 0.25, "string_field": 0.75}}, 319 | {'defType': 'edismax', 'pf': 'text_field^0.25 string_field^0.75'}), 320 | ({"qf": {"text_field": 0.25, "string_field": 0.75}, "mm": 2}, 321 | {'mm': 2, 'defType': 'edismax', 'qf': 'text_field^0.25 string_field^0.75'}), 322 | ), 323 | FieldLimitOptions: ( 324 | ({}, 325 | {}), 326 | ({"fields": "int_field"}, 327 | {"fl": "int_field"}), 328 | ({"fields": ["int_field", "text_field"]}, 329 | {"fl": "int_field,text_field"}), 330 | ({"score": True}, 331 | {"fl": "score"}), 332 | ({"all_fields": True, "score": True}, 333 | {"fl": "*,score"}), 334 | ({"fields": "int_field", "score": True}, 335 | {"fl": "int_field,score"}), 336 | ), 337 | RequestHandlerOption: ( 338 | ({"handler": None}, 339 | {}), 340 | ({"handler": "hans"}, 341 | {'qt': 'hans'}), 342 | ), 343 | DebugOptions: ( 344 | ({"debug": None}, 345 | {}), 346 | ({"debug": False}, 347 | {}), 348 | ({"debug": True}, 349 | {'debugQuery': True}), 350 | ), 351 | StatOptions: ( 352 | ({"fields": "int_field"}, 353 | {"stats": True, "stats.field": ['int_field']}), 354 | ({"fields": ["int_field", "float_field"]}, 355 | {"stats": True, "stats.field": ['int_field', 'float_field']}), 356 | ({"fields": ["int_field", "float_field"], "facet": "field0"}, 357 | {"stats": True, "stats.field": ['int_field', 'float_field'], 358 | "stats.facet": "field0"}), 359 | ), 360 | } 361 | 362 | 363 | def check_good_option_data(OptionClass, kwargs, output): 364 | optioner = OptionClass() 365 | optioner.update(**kwargs) 366 | assert set(optioner.options()) == set(output), "Unequal: %r, %r" % ( 367 | optioner.options(), output) 368 | 369 | # All these tests should really nominate which exception they're going to 370 | # throw. 371 | bad_option_data = { 372 | PaginateOptions: ( 373 | {"start": -1, "rows": None}, # negative start 374 | {"start": None, "rows": -1}, # negative rows 375 | ), 376 | FacetOptions: ( 377 | {"oops": True}, # undefined option 378 | {"limit": "a"}, # invalid type 379 | {"sort": "yes"}, # invalid choice 380 | {"offset": -1}, # invalid value 381 | ), 382 | SortOptions: ( 383 | ), 384 | HighlightOptions: ( 385 | {"oops": True}, # undefined option 386 | {"snippets": "a"}, # invalid type 387 | ), 388 | MoreLikeThisOptions: ( 389 | # string_field in query_fields, not fields 390 | {"fields": "text_field", "query_fields": 391 | {"text_field": 0.25, "string_field": 0.75}}, 392 | # Non-float value for boost 393 | {"fields": "text_field", "query_fields": {"text_field": "a"}}, 394 | {"fields": "text_field", "oops": True}, # undefined option 395 | {"fields": "text_field", "count": "a"} # Invalid value for option 396 | ), 397 | TermVectorOptions: ( 398 | {"foobar": True}, # undefined option 399 | ), 400 | DismaxOptions: ( 401 | # no ss 402 | {"ss": {"text_field": 0.25, "string_field": 0.75}}, 403 | # no float in pf 404 | {"pf": {"text_field": 0.25, "string_field": "ABBS"}}, 405 | ), 406 | StatOptions: ( 407 | {"oops": True}, # undefined option 408 | ) 409 | } 410 | 411 | 412 | def check_bad_option_data(OptionClass, kwargs): 413 | option = OptionClass() 414 | exception_raised = False 415 | try: 416 | option.update(**kwargs) 417 | except SolrError: 418 | exception_raised = True 419 | assert exception_raised 420 | 421 | 422 | complex_boolean_queries = ( 423 | (lambda q: q.query("hello world").filter(q.Q(text_field="tow") | q.Q(boolean_field=False, int_field__gt=3)), 424 | [('fq', b'text_field:tow OR (boolean_field:false AND int_field:{3 TO *})'), ('q', b'hello\\ world')]), 425 | # test multiple fq 426 | (lambda q: q.query("hello world").filter(q.Q(text_field="tow") & q.Q(boolean_field=False, int_field__gt=3)), 427 | [('fq', b'boolean_field:false'), ('fq', b'int_field:{3 TO *}'), ('fq', b'text_field:tow'), ('q', b'hello\\ world')]), 428 | # Test various combinations of NOTs at the top level. 429 | # Sometimes we need to do the *:* trick, sometimes not. 430 | (lambda q: q.query(~q.Q("hello world")), 431 | [('q', b'NOT hello\\ world')]), 432 | (lambda q: q.query(~q.Q("hello world") & ~q.Q(int_field=3)), 433 | [('q', b'NOT hello\\ world AND NOT int_field:3')]), 434 | (lambda q: q.query("hello world", ~q.Q(int_field=3)), 435 | [('q', b'hello\\ world AND NOT int_field:3')]), 436 | (lambda q: q.query("abc", q.Q("def"), ~q.Q(int_field=3)), 437 | [('q', b'abc AND def AND NOT int_field:3')]), 438 | (lambda q: q.query("abc", q.Q("def") & ~q.Q(int_field=3)), 439 | [('q', b'abc AND def AND NOT int_field:3')]), 440 | (lambda q: q.query("abc", q.Q("def") | ~q.Q(int_field=3)), 441 | [('q', b'abc AND (def OR (*:* AND NOT int_field:3))')]), 442 | (lambda q: q.query(q.Q("abc") | ~q.Q("def")), 443 | [('q', b'abc OR (*:* AND NOT def)')]), 444 | (lambda q: q.query(q.Q("abc") | q.Q(~q.Q("def"))), 445 | [('q', b'abc OR (*:* AND NOT def)')]), 446 | # Make sure that ANDs are flattened 447 | (lambda q: q.query("def", q.Q("abc"), q.Q(q.Q("xyz"))), 448 | [('q', b'abc AND def AND xyz')]), 449 | # Make sure that ORs are flattened 450 | (lambda q: q.query(q.Q("def") | q.Q(q.Q("xyz"))), 451 | [('q', b'def OR xyz')]), 452 | # Make sure that empty queries are discarded in ANDs 453 | (lambda q: q.query("def", q.Q("abc"), q.Q(), q.Q(q.Q() & q.Q("xyz"))), 454 | [('q', b'abc AND def AND xyz')]), 455 | # Make sure that empty queries are discarded in ORs 456 | (lambda q: q.query(q.Q() | q.Q("def") | q.Q(q.Q() | q.Q("xyz"))), 457 | [('q', b'def OR xyz')]), 458 | # Test cancellation of NOTs. 459 | (lambda q: q.query(~q.Q(~q.Q("def"))), 460 | [('q', b'def')]), 461 | (lambda q: q.query(~q.Q(~q.Q(~q.Q("def")))), 462 | [('q', b'NOT def')]), 463 | # Test it works through sub-sub-queries 464 | (lambda q: q.query(~q.Q(q.Q(q.Q(~q.Q(~q.Q("def")))))), 465 | [('q', b'NOT def')]), 466 | # Even with empty queries in there 467 | (lambda q: q.query(~q.Q(q.Q(q.Q() & q.Q(q.Q() | ~q.Q(~q.Q("def")))))), 468 | [('q', b'NOT def')]), 469 | # Test escaping of AND, OR, NOT 470 | (lambda q: q.query("AND", "OR", "NOT"), 471 | [('q', b'"AND" AND "NOT" AND "OR"')]), 472 | # Test exclude 473 | (lambda q: q.query("blah").query(~q.Q(q.Q("abc") | q.Q("def") | q.Q("ghi"))), 474 | [('q', b'blah AND NOT (abc OR def OR ghi)')]), 475 | # Try boosts 476 | (lambda q: q.query("blah").query(q.Q("def") ** 1.5), 477 | [('q', b'blah AND def^1.5')]), 478 | (lambda q: q.query("blah").query((q.Q("def") | q.Q("ghi")) ** 1.5), 479 | [('q', b'blah AND (def OR ghi)^1.5')]), 480 | (lambda q: q.query("blah").query(q.Q("def", ~q.Q("pqr") | q.Q("mno")) ** 1.5), 481 | [('q', b'blah AND (def AND ((*:* AND NOT pqr) OR mno))^1.5')]), 482 | # wildcard 483 | (lambda q: q.query("blah").query(q.Q(WildcardString("def*"), 484 | ~q.Q(miu=WildcardString("pqr*")) | q.Q("mno")) ** 1.5), 485 | [('q', b'blah AND (def* AND ((*:* AND NOT miu:pqr*) OR mno))^1.5')]), 486 | (lambda q: q.query("blah").query(q.Q("def*", ~q.Q(miu="pqr*") | q.Q("mno")) ** 1.5), 487 | [('q', b'blah AND (def\\* AND ((*:* AND NOT miu:pqr\\*) OR mno))^1.5')]), 488 | # And boost_relevancy 489 | (lambda q: q.query("blah").boost_relevancy(1.5, int_field=3), 490 | [('q', b'blah OR (blah AND int_field:3^1.5)')]), 491 | (lambda q: q.query("blah").boost_relevancy(1.5, int_field=3).boost_relevancy(2, string_field='def'), 492 | [('q', b'blah OR (blah AND (int_field:3^1.5 OR string_field:def^2))')]), 493 | (lambda q: q.query("blah").query("blah2").boost_relevancy(1.5, int_field=3), 494 | [('q', b'(blah AND blah2) OR (blah AND blah2 AND int_field:3^1.5)')]), 495 | (lambda q: q.query(q.Q("blah") | q.Q("blah2")).boost_relevancy(1.5, int_field=3), 496 | [('q', b'blah OR blah2 OR ((blah OR blah2) AND int_field:3^1.5)')]), 497 | # And ranges 498 | (lambda q: q.query(int_field__any=True), 499 | [('q', b'int_field:[* TO *]')]), 500 | (lambda q: q.query("blah", ~q.Q(int_field__any=True)), 501 | [('q', b'blah AND NOT int_field:[* TO *]')]), 502 | # facet 503 | (lambda q: q.query("game").facet_query(price__lt=7).facet_query(price__gte=7), 504 | [('facet', b'true'), ('facet.query', b'price:[7 TO *]'), 505 | ('facet.query', b'price:{* TO 7}'), ('q', b'game')]), 506 | # group 507 | (lambda q: q.query().group_by('major_value', limit=10), 508 | [('group', b'true'), ('group.field', b'major_value'), ('group.limit', b'10'), 509 | ('group.ngroups', b'true'), ('q', b'*:*')]), 510 | # highlight 511 | (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).highlight('title'), 512 | [('fq', b'text_field:tow'), ('hl', b'true'), ('hl.fl', b'title'), ('q', b'hello\\ world')]), 513 | # termVector 514 | (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).term_vector(df=True), 515 | [('fq', b'text_field:tow'), ('tv', b'true'), ('tv.df', b'true'), ('q', b'hello\\ world')]), 516 | # sort 517 | (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).sort_by('title'), 518 | [('fq', b'text_field:tow'), ('q', b'hello\\ world'), ('sort', b'title asc')]), 519 | # dismax 520 | (lambda q: q.query("hello").filter(q.Q(text_field="tow")).alt_parser( 521 | "dismax", qf={"text_field": 0.25, "string_field": 0.75}), 522 | [('defType', b'dismax'), ('fq', b'text_field:tow'), ('q', b'hello'), 523 | ('qf', b'text_field^0.25 string_field^0.75')]), 524 | # edismax 525 | (lambda q: q.query("hello").filter(q.Q(text_field="tow")).alt_parser( 526 | "edismax", qf={"text_field": 0.25, "string_field": 0.75}, 527 | f={'alias1':['field1', 'field2']} 528 | ), 529 | [('defType', b'edismax'), ('fq', b'text_field:tow'), ('q', b'hello'), 530 | ('qf', b'text_field^0.25 string_field^0.75'), 531 | ('f.alias1.qf', b'field1 field2')]), 532 | # field_limit 533 | (lambda q: q.query().field_limit(['name', 'foo']), 534 | [('fl', b'foo,name'), ('q', b'*:*')]), 535 | (lambda q: q.query().field_limit('foo'), 536 | [('fl', b'foo'), ('q', b'*:*')]), 537 | # set_requesthandler 538 | (lambda q: q.query("hello").set_requesthandler("foo"), 539 | [('q', b'hello'), ('qt', b'foo')]), 540 | # debug 541 | (lambda q: q.query("hello").debug(), 542 | [('debugQuery', b'true'), ('q', b'hello')]), 543 | ) 544 | 545 | 546 | def check_complex_boolean_query(solr_search, query, output): 547 | p = query(solr_search).params() 548 | assert set(p) == set(output), "Unequal: %r, %r" % (p, output) 549 | # And check no mutation of the base object 550 | q = query(solr_search).params() 551 | assert p == q, "Unequal: %r, %r" % (p, q) 552 | 553 | 554 | param_encode_data = ( 555 | ({"int": 3, "string": "string", "unicode": "unicode"}, 556 | [("int", b"3"), ("string", b"string"), ("unicode", b"unicode")]), 557 | ({"int": 3, "string": "string", "unicode": "\N{UMBRELLA}nicode"}, 558 | [("int", b"3"), ("string", b"string"), ("unicode", b"\xe2\x98\x82nicode")]), 559 | # python3 needs unicode as keys 560 | ({"int": 3, "string": "string", "\N{UMBRELLA}nicode": "\N{UMBRELLA}nicode"}, 561 | [("int", b"3"), ("string", b"string"), ("\N{UMBRELLA}nicode", b"\xe2\x98\x82nicode")]), 562 | ({"true": True, "false": False}, 563 | [("false", b"false"), ("true", b"true")]), 564 | ({"list": ["first", "second", "third"]}, 565 | [("list", b"first"), ("list", b"second"), ("list", b"third")]), 566 | ) 567 | 568 | 569 | def check_url_encode_data(kwargs, output): 570 | p = params_from_dict(**kwargs) 571 | assert p == output, "Unequal: %r, %r" % (p, output) 572 | 573 | mlt_query_options_data = ( 574 | ('text_field', {}, {}, 575 | [('mlt.fl', b'text_field'), ('q', b'*:*')]), 576 | (['string_field', 'text_field'], {'string_field': 3.0}, {}, 577 | [('mlt.fl', b'string_field,text_field'), ('mlt.qf', b'string_field^3.0'), 578 | ('q', b'*:*')]), 579 | ('text_field', {}, {'mindf': 3, 'interestingTerms': 'details'}, 580 | [('mlt.fl', b'text_field'), ('mlt.interestingTerms', b'details'), 581 | ('mlt.mindf', b'3'), ('q', b'*:*')]), 582 | ) 583 | 584 | 585 | def check_mlt_query_options(fields, query_fields, kwargs, output): 586 | q = MltSolrSearch(None, content="This is the posted content.") 587 | q = q.mlt(fields, query_fields=query_fields, **kwargs) 588 | assert q.params() == output 589 | 590 | 591 | def flatten(test_data): 592 | new_data = [] 593 | for method, data in test_data.items(): 594 | for row in data: 595 | if isinstance(row, (list, tuple)): 596 | new_data.append([method, *row]) 597 | else: 598 | new_data.append([method, row]) 599 | return new_data 600 | 601 | 602 | @pytest.mark.parametrize( 603 | "method,args,kwargs,expected", flatten(good_query_data)) 604 | def test_query_data(method, args, kwargs, expected): 605 | check_query_data(method, args, kwargs, expected) 606 | 607 | 608 | @pytest.mark.parametrize( 609 | "method,args,kwargs,expected", flatten(base_good_query_data)) 610 | def test_mlt_query_data(method, args, kwargs, expected): 611 | check_mlt_query_data(method, args, kwargs, expected) 612 | 613 | 614 | @pytest.mark.parametrize( 615 | "option_class,kwargs,expected", flatten(good_option_data)) 616 | def test_good_option_data(option_class, kwargs, expected): 617 | check_good_option_data(option_class, kwargs, expected) 618 | 619 | 620 | @pytest.mark.parametrize("option_class,kwargs", flatten(bad_option_data)) 621 | def test_bad_option_data(option_class, kwargs): 622 | check_bad_option_data(option_class, kwargs) 623 | 624 | 625 | @pytest.mark.parametrize("query,expected", complex_boolean_queries) 626 | def test_complex_boolean_queries(query, expected): 627 | solr_search = SolrSearch(None) 628 | check_complex_boolean_query(solr_search, query, expected) 629 | 630 | 631 | @pytest.mark.parametrize("kwargs, expected", param_encode_data) 632 | def test_url_encode_data(kwargs, expected): 633 | check_url_encode_data(kwargs, expected) 634 | 635 | 636 | @pytest.mark.parametrize( 637 | "fields,query_fields,kwargs,expected", mlt_query_options_data) 638 | def test_mlt_query_options(fields, query_fields, kwargs, expected): 639 | check_mlt_query_options(fields, query_fields, kwargs, expected) 640 | 641 | 642 | def test_is_iter(): 643 | assert is_iter("abc") == False 644 | assert is_iter(1) == False 645 | assert is_iter([1, 2]) == True 646 | assert is_iter((1, 2)) == True 647 | assert is_iter(set([1, 2])) == True 648 | -------------------------------------------------------------------------------- /scorched/tests/test_strings.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from scorched.search import LuceneQuery 4 | from scorched.strings import RawString, WildcardString 5 | 6 | 7 | class TestStrings(unittest.TestCase): 8 | def test_string_escape(self): 9 | """Ensure that string characters are escaped correctly for Solr queries.""" 10 | test_str = '+-&|!(){}[]^"~*?: \t\v\\/' 11 | escaped = RawString(test_str).escape_for_lqs_term() 12 | self.assertEqual( 13 | escaped, 14 | '\\+\\-\\&\\|\\!\\(\\)\\{\\}\\[\\]\\^\\"\\~\\*\\?\\:\\ \\\t\\\x0b\\\\\\/', 15 | ) 16 | 17 | def test_wildcard_string(self): 18 | q = LuceneQuery() 19 | q = q.Q(WildcardString("occurrencetype$$pressemitteilung$$*")) 20 | output = {None: "occurrencetype$$pressemitteilung$$*"} 21 | self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output)) 22 | # slash 23 | q = q.Q(WildcardString("occu/*/baum")) 24 | output = {None: "occu\\/*\\/baum"} 25 | self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output)) 26 | # backslash 27 | q = q.Q(WildcardString("occu\\*baum\\?aus\\")) 28 | output = {None: "occu\\*baum\\?aus\\\\"} 29 | self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output)) 30 | # question mark 31 | q = q.Q(WildcardString("occ?/*/baum")) 32 | output = {None: "occ?\\/*\\/baum"} 33 | self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output)) 34 | -------------------------------------------------------------------------------- /scorched/tests/test_testing.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import scorched.testing 3 | from unittest import mock 4 | 5 | 6 | class TestTesting(unittest.TestCase): 7 | 8 | def test_solr(self): 9 | self.assertRaises(Exception, 10 | scorched.testing.is_solr_available("http://foo")) 11 | 12 | def test_solr_decorator(self): 13 | with mock.patch.object(scorched.testing, "is_solr_available", 14 | return_value=False): 15 | func = lambda x: x 16 | self.assertTrue(hasattr(scorched.testing.skip_unless_solr(func), 17 | '__unittest_skip_why__')) 18 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | match=^test 3 | nocapture=1 4 | cover-package=scorched 5 | with-coverage=1 6 | cover-erase=1 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import os 4 | 5 | from setuptools import find_packages, setup 6 | 7 | version = "1.0.0.0b3.dev0" 8 | 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | README = open(os.path.join(here, "README.rst")).read() 11 | CHANGES = open(os.path.join(here, "CHANGES.rst")).read() 12 | 13 | 14 | setup( 15 | name="scorched", 16 | version=version, 17 | description="solr search orm like query builder", 18 | long_description=README + "\n\n" + CHANGES, 19 | classifiers=[ 20 | "Environment :: Console", 21 | "Environment :: Web Environment", 22 | "Intended Audience :: Developers", 23 | "License :: OSI Approved :: MIT License", 24 | "Operating System :: MacOS :: MacOS X", 25 | "Operating System :: Microsoft :: Windows", 26 | "Operating System :: POSIX", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.7", 29 | "Programming Language :: Python :: 3.8", 30 | "Programming Language :: Python :: 3.9", 31 | "Programming Language :: Python :: 3.10", 32 | ], 33 | keywords="solr tow sunburnt offspring", 34 | author="(Josip Delic) Lugensa GmbH", 35 | author_email="info@lugensa.com", 36 | url="http://www.lugensa.com", 37 | license="MIT", 38 | packages=find_packages(exclude=["ez_setup", "examples", "tests"]), 39 | include_package_data=True, 40 | zip_safe=False, 41 | python_requires=">=3.7.0", 42 | install_requires=[ 43 | "setuptools", 44 | "requests", 45 | "pytz", 46 | ], 47 | extras_require={ 48 | "test": ["pytest<7.0.0", "coverage", "pytest-docker"], 49 | }, 50 | test_suite="scorched.tests", 51 | ) 52 | -------------------------------------------------------------------------------- /testing-solr.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SOLR_PORT=${SOLR_PORT:-8983} 4 | SOLR_VERSION=${SOLR_VERSION:-4.10.2} 5 | DEBUG=${DEBUG:-false} 6 | SOLR_CORE=${SOLR_CORE:-core0} 7 | 8 | download() { 9 | FILE="$2.tgz" 10 | if [ -f $FILE ]; 11 | then 12 | echo "File $FILE exists." 13 | tar -zxf $FILE 14 | else 15 | echo "File $FILE does not exist. Downloading solr from $1..." 16 | curl -O $1 17 | tar -zxf $FILE 18 | fi 19 | echo "Downloaded!" 20 | } 21 | 22 | is_solr_up(){ 23 | echo "Checking if Solr is up on http://localhost:$SOLR_PORT/solr/admin/cores" 24 | http_code=`echo $(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$SOLR_PORT/solr/admin/cores")` 25 | return `test $http_code = "200"` 26 | } 27 | 28 | wait_for_solr(){ 29 | while ! is_solr_up; do 30 | sleep 5 31 | done 32 | } 33 | 34 | run() { 35 | dir_name=$1 36 | solr_port=$2 37 | solr_core=$3 38 | # Run solr 39 | echo "Running with folder $dir_name" 40 | echo "Starting solr on port ${solr_port}..." 41 | 42 | # go to the solr folder 43 | cd $1/example 44 | 45 | if [ "$DEBUG" = "true" ] 46 | then 47 | java -Djetty.port=$solr_port -Dsolr.solr.home=multicore -jar start.jar & 48 | else 49 | java -Djetty.port=$solr_port -Dsolr.solr.home=multicore -jar start.jar > /dev/null 2>&1 & 50 | fi 51 | wait_for_solr 52 | cd ../../ 53 | echo "Started" 54 | } 55 | 56 | 57 | download_and_run() { 58 | case $1 in 59 | 3.5.0) 60 | url="http://archive.apache.org/dist/lucene/solr/3.5.0/apache-solr-3.5.0.tgz" 61 | dir_name="apache-solr-3.5.0" 62 | dir_conf="conf/" 63 | ;; 64 | 3.6.0) 65 | url="http://archive.apache.org/dist/lucene/solr/3.6.0/apache-solr-3.6.0.tgz" 66 | dir_name="apache-solr-3.6.0" 67 | dir_conf="conf/" 68 | ;; 69 | 3.6.1) 70 | url="http://archive.apache.org/dist/lucene/solr/3.6.1/apache-solr-3.6.1.tgz" 71 | dir_name="apache-solr-3.6.1" 72 | dir_conf="conf/" 73 | ;; 74 | 3.6.2) 75 | url="http://archive.apache.org/dist/lucene/solr/3.6.2/apache-solr-3.6.2.tgz" 76 | dir_name="apache-solr-3.6.2" 77 | dir_conf="conf/" 78 | ;; 79 | 4.0.0) 80 | url="http://archive.apache.org/dist/lucene/solr/4.0.0/apache-solr-4.0.0.tgz" 81 | dir_name="apache-solr-4.0.0" 82 | dir_conf="collection1/conf/" 83 | ;; 84 | 4.1.0) 85 | url="http://archive.apache.org/dist/lucene/solr/4.1.0/solr-4.1.0.tgz" 86 | dir_name="solr-4.1.0" 87 | dir_conf="collection1/conf/" 88 | ;; 89 | 4.2.0) 90 | url="http://archive.apache.org/dist/lucene/solr/4.2.0/solr-4.2.0.tgz" 91 | dir_name="solr-4.2.0" 92 | dir_conf="collection1/conf/" 93 | ;; 94 | 4.2.1) 95 | url="http://archive.apache.org/dist/lucene/solr/4.2.1/solr-4.2.1.tgz" 96 | dir_name="solr-4.2.1" 97 | dir_conf="collection1/conf/" 98 | ;; 99 | 4.3.1) 100 | url="http://archive.apache.org/dist/lucene/solr/4.3.1/solr-4.3.1.tgz" 101 | dir_name="solr-4.3.1" 102 | dir_conf="collection1/conf/" 103 | ;; 104 | 4.4.0) 105 | url="http://archive.apache.org/dist/lucene/solr/4.4.0/solr-4.4.0.tgz" 106 | dir_name="solr-4.4.0" 107 | dir_conf="collection1/conf/" 108 | ;; 109 | 4.5.0) 110 | url="http://archive.apache.org/dist/lucene/solr/4.5.0/solr-4.5.0.tgz" 111 | dir_name="solr-4.5.0" 112 | dir_conf="collection1/conf/" 113 | ;; 114 | 4.5.1) 115 | url="http://archive.apache.org/dist/lucene/solr/4.5.1/solr-4.5.1.tgz" 116 | dir_name="solr-4.5.1" 117 | dir_conf="collection1/conf/" 118 | ;; 119 | 4.6.0) 120 | url="http://archive.apache.org/dist/lucene/solr/4.6.0/solr-4.6.0.tgz" 121 | dir_name="solr-4.6.0" 122 | dir_conf="collection1/conf/" 123 | ;; 124 | 4.6.1) 125 | url="http://archive.apache.org/dist/lucene/solr/4.6.1/solr-4.6.1.tgz" 126 | dir_name="solr-4.6.1" 127 | dir_conf="collection1/conf/" 128 | ;; 129 | 4.7.0) 130 | url="http://archive.apache.org/dist/lucene/solr/4.7.0/solr-4.7.0.tgz" 131 | dir_name="solr-4.7.0" 132 | dir_conf="collection1/conf/" 133 | ;; 134 | 4.7.1) 135 | url="http://archive.apache.org/dist/lucene/solr/4.7.1/solr-4.7.1.tgz" 136 | dir_name="solr-4.7.1" 137 | dir_conf="collection1/conf/" 138 | ;; 139 | 4.7.2) 140 | url="http://archive.apache.org/dist/lucene/solr/4.7.2/solr-4.7.2.tgz" 141 | dir_name="solr-4.7.2" 142 | dir_conf="collection1/conf/" 143 | ;; 144 | 4.8.0) 145 | url="http://archive.apache.org/dist/lucene/solr/4.8.0/solr-4.8.0.tgz" 146 | dir_name="solr-4.8.0" 147 | dir_conf="collection1/conf/" 148 | ;; 149 | 4.8.1) 150 | url="http://archive.apache.org/dist/lucene/solr/4.8.1/solr-4.8.1.tgz" 151 | dir_name="solr-4.8.1" 152 | dir_conf="collection1/conf/" 153 | ;; 154 | 4.9.0) 155 | url="http://archive.apache.org/dist/lucene/solr/4.9.0/solr-4.9.0.tgz" 156 | dir_name="solr-4.9.0" 157 | dir_conf="collection1/conf/" 158 | ;; 159 | 4.9.1) 160 | url="http://archive.apache.org/dist/lucene/solr/4.9.1/solr-4.9.1.tgz" 161 | dir_name="solr-4.9.1" 162 | dir_conf="collection1/conf/" 163 | ;; 164 | 4.10.2) 165 | url="http://archive.apache.org/dist/lucene/solr/4.10.2/solr-4.10.2.tgz" 166 | dir_name="solr-4.10.2" 167 | dir_conf="collection1/conf/" 168 | ;; 169 | esac 170 | 171 | download $url $dir_name 172 | add_core $dir_name $dir_conf $SOLR_CORE $SOLR_CONFS 173 | run $dir_name $SOLR_PORT $SOLR_CORE 174 | 175 | if [ -z "${SOLR_DOCS}" ] 176 | then 177 | echo "$solr_docs not defined, skipping initial indexing" 178 | else 179 | post_documents $dir_name $SOLR_DOCS $SOLR_CORE $SOLR_PORT 180 | fi 181 | } 182 | 183 | add_core() { 184 | dir_name=$1 185 | dir_conf=$2 186 | solr_core=$3 187 | solr_confs=$4 188 | # prepare our folders 189 | [[ -d "${dir_name}/example/multicore/${solr_core}" ]] || mkdir $dir_name/example/multicore/$solr_core 190 | [[ -d "${dir_name}/example/multicore/${solr_core}/conf" ]] || mkdir $dir_name/example/multicore/$solr_core/conf 191 | 192 | # copy full solr example first 193 | cp -R $dir_name/example/solr/$dir_conf/* $dir_name/example/multicore/$solr_core/conf 194 | 195 | # overwrite with custom configurations 196 | if [ -d "${solr_confs}" ] ; then 197 | cp -R $solr_confs/* $dir_name/example/multicore/$solr_core/conf/ 198 | else 199 | for file in $solr_confs 200 | do 201 | if [ -f "${file}" ]; then 202 | cp $file $dir_name/example/multicore/$solr_core/conf 203 | echo "Copied $file into solr conf directory." 204 | else 205 | echo "${file} is not valid"; 206 | exit 1 207 | fi 208 | done 209 | fi 210 | } 211 | 212 | post_documents() { 213 | dir_name=$1 214 | solr_docs=$2 215 | solr_core=$3 216 | solr_port=$4 217 | # Post documents 218 | if [ -z "${solr_docs}" ] 219 | then 220 | echo "SOLR_DOCS not defined, skipping initial indexing" 221 | else 222 | echo "Indexing $solr_docs" 223 | java -Dtype=application/json -Durl=http://localhost:$solr_port/solr/$solr_core/update/json -jar $dir_name/example/exampledocs/post.jar $solr_docs 224 | fi 225 | } 226 | 227 | check_version() { 228 | case $1 in 229 | 3.5.0|3.6.0|3.6.1|3.6.2|4.0.0|4.1.0|4.2.0|4.2.1|4.3.1|4.4.0|4.5.0|4.5.1|4.6.0|4.6.1|4.7.0|4.7.1|4.7.2|4.8.0|4.8.1|4.9.0|4.9.1|4.10.2);; 230 | *) 231 | echo "Sorry, $1 is not supported or not valid version." 232 | exit 1 233 | ;; 234 | esac 235 | } 236 | 237 | check_version $SOLR_VERSION 238 | download_and_run $SOLR_VERSION 239 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py37,py38,py39,py310 3 | 4 | [testenv] 5 | recreate = true 6 | setenv = 7 | TEST_DIR=scorched/tests/ 8 | PACKAGE_DIR=scorched 9 | extras = test 10 | commands = 11 | py.test {posargs: --junitxml junit-{envname}.xml --cov {env:PACKAGE_DIR} --cov-report xml:coverage-{envname}.xml {env:TEST_DIR}} 12 | usedevelop = True 13 | deps = 14 | pytest < 7.0.0 15 | pytest-cov 16 | pytest-docker 17 | --------------------------------------------------------------------------------