├── .coveragerc
├── .flake8
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGES.rst
├── CONTRIBUTORS.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── bench.py
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── mlt.rst
    ├── query.rst
    └── usage.rst
├── pyproject.toml
├── scorched
    ├── __init__.py
    ├── compat.py
    ├── connection.py
    ├── dates.py
    ├── exc.py
    ├── response.py
    ├── search.py
    ├── strings.py
    ├── testing.py
    └── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── data
    │       └── lipsum.pdf
    │   ├── docker-compose.yml
    │   ├── dumps
    │       ├── books.json
    │       ├── request_error.json
    │       ├── request_hl.json
    │       ├── request_hl_grouped.json
    │       ├── request_w_facets.json
    │       └── request_w_termvector.json
    │   ├── schema.py
    │   ├── solrconfig.xml
    │   ├── solrconfig_8.11.xml
    │   ├── test_connection.py
    │   ├── test_dates.py
    │   ├── test_functional.py
    │   ├── test_response.py
    │   ├── test_search.py
    │   ├── test_strings.py
    │   └── test_testing.py
├── setup.cfg
├── setup.py
├── testing-solr.sh
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit =
3 |     */python?.?/*
4 |     */pypy/*
5 |     */site-packages/nose/*
6 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | #ignore = E203, E266, E501, W503, F403, F401
3 | max-line-length = 89
4 | max-complexity = 18
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | *eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | env
22 | _build
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | 
27 | # Unit test / coverage reports
28 | .coverage
29 | .tox
30 | nosetests.xml
31 | 
32 | # Translations
33 | *.mo
34 | 
35 | # Mr Developer
36 | .mr.developer.cfg
37 | .project
38 | .pydevproject
39 | 
40 | # vim
41 | *.swp
42 | 
43 | # potential solr downloads
44 | solr-*
45 | 
46 | man
47 | pyvenv.cfg
48 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | repos:
 3 |     - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |       rev: v3.4.0
 5 |       hooks:
 6 |           - id: trailing-whitespace
 7 |           - id: end-of-file-fixer
 8 |           - id: fix-encoding-pragma
 9 |             args: [--remove]
10 |           - id: check-yaml
11 |           - id: debug-statements
12 |             language_version: python3
13 |           - id: check-xml
14 |             types: [file]
15 |             files: \.zcml$|\.xml$
16 |     - repo: https://github.com/timothycrosley/isort
17 |       rev: 5.7.0
18 |       hooks:
19 |           - id: isort
20 |             files: \.py$
21 |     - repo: https://github.com/psf/black
22 |       rev: 21.6b0
23 |       hooks:
24 |           - id: black
25 | #    - repo: https://github.com/pre-commit/mirrors-mypy
26 | #      rev: 'v0.910'  # Use the sha / tag you want to point at
27 | #      hooks:
28 | #          - id: mypy
29 | #            additional_dependencies: [types-requests, types-setuptools]
30 |     - repo: https://gitlab.com/pycqa/flake8
31 |       rev: 3.8.4
32 |       hooks:
33 |           - id: flake8
34 |             language_version: python3
35 |             additional_dependencies: [flake8-typing-imports==1.9.0]
36 |     - repo: https://github.com/mgedmin/check-manifest
37 |       rev: "0.46"
38 |       hooks:
39 |       - id: check-manifest
40 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
  1 | CHANGES
  2 | =======
  3 | 
  4 | 1.0.0.0b3 (unreleased)
  5 | ----------------------
  6 | 
  7 | - Nothing changed yet.
  8 | 
  9 | 
 10 | 1.0.0.0b2 (2022-03-21)
 11 | ----------------------
 12 | 
 13 | - Fix last version: pep440 does not support semantic versioning
 14 |   as supposed by https://semver.org (neither 1.0 no 2.0).
 15 |   openstack suggests a modified semver as described here:
 16 |   https://docs.openstack.org/pbr/latest/user/semver.html
 17 | 
 18 | 
 19 | 1.0.0-beta.1 (2022-03-21)
 20 | -------------------------
 21 | 
 22 | Backwards incompatible changes
 23 | ++++++++++++++++++++++++++++++
 24 | 
 25 | - In Response.facet_counts the default value for
 26 |   `facet_counts.facet_pivot` is now `{}` instead of `(,)` if
 27 |   `facet_pivot` was not in the solr response json
 28 | 
 29 | - Remove support for Python 2.7
 30 | 
 31 | Bug fixes
 32 | +++++++++
 33 | 
 34 | - Added missing mlt parameter (mlissner)
 35 | 
 36 | Features
 37 | ++++++++
 38 | 
 39 | - interpret fields of type `date` and/or `pdate` as
 40 |   solr datefields
 41 | 
 42 | - Python 3.9 is now the baseline.
 43 | 
 44 | 
 45 | 0.13.0 (2020-01-25)
 46 | -------------------
 47 | 
 48 | - Add support for Python 3.6, Python3.7 and Python 3.8 (ale-rt)
 49 | 
 50 | 
 51 | 0.12 (2017-03-16)
 52 | -----------------
 53 | 
 54 | - Add extract handler support (mamico)
 55 | 
 56 | - Allow user to pass an http_connection to SolrInterface ctor (quinot)
 57 | 
 58 | - ``BaseSearch`` has now a ``count`` function (mlissner)
 59 | 
 60 | - Support atomic updates (ale-rt)
 61 | 
 62 | 
 63 | 0.11.0 (2016-10-11)
 64 | -------------------
 65 | 
 66 | - Highlighting is now available in the result documents as the
 67 |   ``solr_highlights`` field (mlissner)
 68 | 
 69 | - smaller documentation cleanup
 70 | 
 71 | 
 72 | 0.10.2 (2016-09-27)
 73 | -------------------
 74 | 
 75 | - Fix is_iter implementation #30 (mamico)
 76 | 
 77 | - Multi-value date fields work (mlissner)
 78 | 
 79 | - Fixes error in the readme so that DEBUG mode works as documented (mlissner)
 80 | 
 81 | 
 82 | 0.10.1 (2016-06-15)
 83 | -------------------
 84 | 
 85 | - Fixing setup.py classifier.
 86 | 
 87 | 
 88 | 0.10 (2016-06-15)
 89 | -----------------
 90 | 
 91 | - Return response for update actions (mamico)
 92 | 
 93 | - Add support for Solr cursors (Chronial)
 94 | 
 95 | - Added stats option (rlskoeser)
 96 | 
 97 | 
 98 | 0.9 (2015-11-09)
 99 | ----------------
100 | 
101 | - Better check datetime dynamicfields (mamico)
102 | 
103 | - RealTime Get (Chronial)
104 | 
105 | - TermVector support (Chronial)
106 | 
107 | 
108 | 0.8 (2015-08-26)
109 | ----------------
110 | 
111 | - use compat.basestring over compat.str in date convert (mamico)
112 | 
113 | - remove test from core requirements (mamico)
114 | 
115 | - added search_timeout paramter to SolrConnection (mamico)
116 | 
117 | - fix. Do not alter documents while adding new documents
118 | 
119 | 
120 | 0.7 (2015-04-17)
121 | ----------------
122 | 
123 | - Test against Solr 4.10.2 and added Python 3.4 to travis.
124 | 
125 | - Added support for dismax queries.
126 | 
127 | - Added support edismax field aliases.
128 | 
129 | - Added support for facet ranges.
130 | 
131 | 
132 | 0.6 (2014-06-23)
133 | ----------------
134 | 
135 | - Add spellchecking for scorched queries. (#9707)
136 | 
137 | 
138 | 0.5 (2014-06-05)
139 | ----------------
140 | 
141 | - Add `debugQuery` parameter to search. (#9903)
142 | 
143 | - Add possibility to specify the request handler to use per query. (#9704)
144 | 
145 | 
146 | 0.4.1 (2014-04-16)
147 | ------------------
148 | 
149 | - Fixed again fields in field_limiter.
150 | 
151 | 
152 | 0.4 (2014-04-16)
153 | ----------------
154 | 
155 | - Fixed fields convert to arrays.
156 | 
157 | - Added FacetPivotOptions.
158 | 
159 | - Added PostingsHighlightOptions.
160 | 
161 | - Added boundaryScanner to HighlightOptions.
162 | 
163 | 
164 | 0.3 (2014-04-03)
165 | ----------------
166 | 
167 | - Makes SolrResponse iterable.
168 | 
169 | 
170 | 0.2 (2014-03-24)
171 | ----------------
172 | 
173 | - Added more tests
174 | 
175 | - Added description in setup.py
176 | 
177 | 
178 | 0.1 (2014-03-20)
179 | ----------------
180 | 
181 | - Python 3
182 | 
183 | - Cleaner api moved redundant functions
184 | 
185 | - Cleaner api removed filter_exclude use ~si.Q()
186 | 
187 | - Cleaner api removed exclude use ~si.Q()
188 | 
189 | - Fixed mlt_search (mlt component and handler)
190 | 
191 | - Removed mx.DateTime
192 | 
193 | - Removed redundant more_like_this
194 | 
195 | - Offspring of sunburnt is born
196 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.rst:
--------------------------------------------------------------------------------
 1 | Contributors
 2 | ============
 3 | 
 4 | - Alessandro Pisa
 5 | 
 6 | - Mauro Amico
 7 | 
 8 | - Josip Delic
 9 | 
10 | - Janko Hauser
11 | 
12 | - Mark E. Haase
13 | 
14 | - Mike Lissner
15 | 
16 | - Thomas Quinot
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, 2010, 2011 Toby White <toby@eaddrinu.se>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in 
 5 | the Software without restriction, including without limitation the rights to 
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 
 7 | of the Software, and to permit persons to whom the Software is furnished to do 
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all 
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.py
 2 | include *.sh
 3 | include *.rst
 4 | 
 5 | recursive-include docs *.bat
 6 | recursive-include docs *.py
 7 | recursive-include docs *.rst
 8 | recursive-include docs Makefile
 9 | recursive-include scorched *.json
10 | recursive-include scorched *.pdf
11 | recursive-include scorched *.xml
12 | recursive-include scorched *.yml
13 | 
14 | exclude .coveragerc
15 | exclude tox.ini
16 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Scorched
 2 | ========
 3 | 
 4 | Scorched is a sunburnt offspring and like all offspring it tries to make
 5 | things better or at least different.
 6 | 
 7 | Git Repository and issue tracker: https://github.com/lugensa/scorched
 8 | 
 9 | Documentation: http://scorched.readthedocs.org/en/latest/
10 | 
11 | .. |travisci| image::  https://travis-ci.org/lugensa/scorched.png
12 | .. _travisci: https://travis-ci.org/lugensa/scorched
13 | 
14 | .. image:: https://coveralls.io/repos/lugensa/scorched/badge.png
15 |     :target: https://coveralls.io/r/lugensa/scorched
16 | 
17 | |travisci|_
18 | 
19 | .. _Solr : http://lucene.apache.org/solr/
20 | .. _Lucene : http://lucene.apache.org/java/docs/index.html
21 | 
22 | 
23 | Following some major differences:
24 | 
25 | - No validation of queries in client code (make code much more lightweight)
26 | 
27 | - Send and receive as json. (Faster 20k docs from 6.5s to 1.3s)
28 | 
29 | - API is more lightweight e.g. ``add`` consumes now only dicts.
30 | 
31 | - Wildcard search strings need to be explicitly set.
32 | 
33 | - Python 3
34 | 
35 | - Drops support for Solr < 4.3.0
36 | 
37 | - ...
38 | 
39 | 
40 | Local testing
41 | =============
42 | 
43 | We changed to pytest and pytest-docker to spin-off
44 | the tests.
45 | 
46 | The account on your os under which you run the tests
47 | should have permissions to start docker processes.
48 | 
49 | First checkout the sources::
50 | 
51 |   https://github.com/lugensa/scorched.git
52 | 
53 | Now use tox for testing::
54 | 
55 |   cd scorched
56 |   tox
57 | 
58 | Additionally use pytest directly::
59 | 
60 |   cd scorched
61 |   python3.10 -mvenv .
62 |   ./bin/pip install -e .[test]
63 |   ./bin/pytest ./scorched
64 | 
65 | Running the tests will start a solr-8.11.1 in docker
66 | (see scorched/tests/docker-compose.yml).
67 | 


--------------------------------------------------------------------------------
/bench.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | import scorched
 4 | import time
 5 | import datetime
 6 | 
 7 | from matplotlib import pyplot
 8 | from scorched.compat import is_py2
 9 | 
10 | if is_py2:
11 |     import sunburnt
12 | 
13 | 
14 | def build(n):
15 |     docs = []
16 |     for i in range(n):
17 |         doc = {'author_t': 'George R.R. Martin',
18 |                'cat': 'book',
19 |                'date_dt': datetime.datetime(2014, 3, 11, 10, 49, 0, 747991),
20 |                'genre_s': 'fantasy',
21 |                'id': '%s' % i,
22 |                'inStock': True,
23 |                'name': 'A fisch of Thrones',
24 |                'price': 7.99,
25 |                'sequence_i': 1,
26 |                'series_t': 'A Song of Ice and Fire'}
27 |         docs.append(doc)
28 |     return docs
29 | 
30 | 
31 | def run(n, interface):
32 |     docs = build(n)
33 |     si = interface("http://localhost:8983/solr/")
34 |     start = time.clock()
35 |     si.add(docs)
36 |     si.commit()
37 |     elapsed = (time.clock() - start)
38 |     print("%s docs took %ss" % (len(docs), elapsed))
39 |     query = si.query(name='fisch')
40 |     res = si.search(**query.options())
41 |     print("found %s" % res.result.numFound)
42 |     si.delete_all()
43 |     si.commit()
44 |     return {'x': n, 'y': elapsed}
45 | 
46 | count = 21
47 | if is_py2:
48 |     data_sunburnt = []
49 |     for i in [x*1000 for x in range(1, count)]:
50 |         data_sunburnt.append(run(i, sunburnt.SolrInterface))
51 | 
52 | data_scorched = []
53 | for i in [x*1000 for x in range(1, count)]:
54 |     data_scorched.append(run(i, scorched.SolrInterface))
55 | 
56 | if is_py2:
57 |     pyplot.plot(
58 |         [x['x'] for x in data_sunburnt], [y['y'] for y in data_sunburnt], '-')
59 | pyplot.plot(
60 |     [x['x'] for x in data_scorched], [y['y'] for y in data_scorched], '-')
61 | pyplot.title('Plotting adding speed')
62 | pyplot.xlabel('Number documents')
63 | pyplot.ylabel('Time in seconds (less is better)')
64 | if is_py2:
65 |     pyplot.legend(['sunburnt', 'scorched'])
66 | else:
67 |     pyplot.legend(['scorched'])
68 | pyplot.savefig('bench.png')
69 | pyplot.show()
70 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = ../env/bin/sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/scorched.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/scorched.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/scorched"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/scorched"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | Scorched API
 4 | ============
 5 | 
 6 | API
 7 | ---
 8 | 
 9 | .. automodule:: scorched.connection
10 |    :members: grouper
11 | 
12 | .. autoclass:: SolrConnection
13 |    :members:
14 | 
15 |    .. automethod:: __init__
16 | 
17 | .. autoclass:: SolrInterface
18 |    :members:
19 | 
20 |    .. automethod:: __init__
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # scorched documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed Mar 12 21:48:32 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | 
 23 | # -- General configuration ------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | #needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be
 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 30 | # ones.
 31 | extensions = [
 32 |     'sphinx.ext.autodoc',
 33 |     'sphinx.ext.todo',
 34 |     'sphinx.ext.viewcode',
 35 | ]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix of source filenames.
 41 | source_suffix = '.rst'
 42 | 
 43 | # The encoding of source files.
 44 | #source_encoding = 'utf-8-sig'
 45 | 
 46 | # The master toctree document.
 47 | master_doc = 'index'
 48 | 
 49 | # General information about the project.
 50 | project = u'scorched'
 51 | copyright = u'2014, Josip Delic'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | # The short X.Y version.
 58 | version = '0.1'
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = '0.1'
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #language = None
 65 | 
 66 | # There are two options for replacing |today|: either, you set today to some
 67 | # non-false value, then it is used:
 68 | #today = ''
 69 | # Else, today_fmt is used as the format for a strftime call.
 70 | #today_fmt = '%B %d, %Y'
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | exclude_patterns = ['_build']
 75 | 
 76 | # The reST default role (used for this markup: `text`) to use for all
 77 | # documents.
 78 | #default_role = None
 79 | 
 80 | # If true, '()' will be appended to :func: etc. cross-reference text.
 81 | #add_function_parentheses = True
 82 | 
 83 | # If true, the current module name will be prepended to all description
 84 | # unit titles (such as .. function::).
 85 | #add_module_names = True
 86 | 
 87 | # If true, sectionauthor and moduleauthor directives will be shown in the
 88 | # output. They are ignored by default.
 89 | #show_authors = False
 90 | 
 91 | # The name of the Pygments (syntax highlighting) style to use.
 92 | pygments_style = 'sphinx'
 93 | 
 94 | # A list of ignored prefixes for module index sorting.
 95 | #modindex_common_prefix = []
 96 | 
 97 | # If true, keep warnings as "system message" paragraphs in the built documents.
 98 | #keep_warnings = False
 99 | 
100 | 
101 | # -- Options for HTML output ----------------------------------------------
102 | 
103 | # The theme to use for HTML and HTML Help pages.  See the documentation for
104 | # a list of builtin themes.
105 | html_theme = 'default'
106 | 
107 | # Theme options are theme-specific and customize the look and feel of a theme
108 | # further.  For a list of options available for each theme, see the
109 | # documentation.
110 | #html_theme_options = {}
111 | 
112 | # Add any paths that contain custom themes here, relative to this directory.
113 | #html_theme_path = []
114 | 
115 | # The name for this set of Sphinx documents.  If None, it defaults to
116 | # "<project> v<release> documentation".
117 | #html_title = None
118 | 
119 | # A shorter title for the navigation bar.  Default is the same as html_title.
120 | #html_short_title = None
121 | 
122 | # The name of an image file (relative to this directory) to place at the top
123 | # of the sidebar.
124 | #html_logo = None
125 | 
126 | # The name of an image file (within the static path) to use as favicon of the
127 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
128 | # pixels large.
129 | #html_favicon = None
130 | 
131 | # Add any paths that contain custom static files (such as style sheets) here,
132 | # relative to this directory. They are copied after the builtin static files,
133 | # so a file named "default.css" will overwrite the builtin "default.css".
134 | html_static_path = ['_static']
135 | 
136 | # Add any extra paths that contain custom files (such as robots.txt or
137 | # .htaccess) here, relative to this directory. These files are copied
138 | # directly to the root of the documentation.
139 | #html_extra_path = []
140 | 
141 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
142 | # using the given strftime format.
143 | #html_last_updated_fmt = '%b %d, %Y'
144 | 
145 | # If true, SmartyPants will be used to convert quotes and dashes to
146 | # typographically correct entities.
147 | #html_use_smartypants = True
148 | 
149 | # Custom sidebar templates, maps document names to template names.
150 | #html_sidebars = {}
151 | 
152 | # Additional templates that should be rendered to pages, maps page names to
153 | # template names.
154 | #html_additional_pages = {}
155 | 
156 | # If false, no module index is generated.
157 | #html_domain_indices = True
158 | 
159 | # If false, no index is generated.
160 | #html_use_index = True
161 | 
162 | # If true, the index is split into individual pages for each letter.
163 | #html_split_index = False
164 | 
165 | # If true, links to the reST sources are added to the pages.
166 | #html_show_sourcelink = True
167 | 
168 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
169 | #html_show_sphinx = True
170 | 
171 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
172 | #html_show_copyright = True
173 | 
174 | # If true, an OpenSearch description file will be output, and all pages will
175 | # contain a <link> tag referring to it.  The value of this option must be the
176 | # base URL from which the finished HTML is served.
177 | #html_use_opensearch = ''
178 | 
179 | # This is the file name suffix for HTML files (e.g. ".xhtml").
180 | #html_file_suffix = None
181 | 
182 | # Output file base name for HTML help builder.
183 | htmlhelp_basename = 'scorcheddoc'
184 | 
185 | 
186 | # -- Options for LaTeX output ---------------------------------------------
187 | 
188 | latex_elements = {
189 | # The paper size ('letterpaper' or 'a4paper').
190 | #'papersize': 'letterpaper',
191 | 
192 | # The font size ('10pt', '11pt' or '12pt').
193 | #'pointsize': '10pt',
194 | 
195 | # Additional stuff for the LaTeX preamble.
196 | #'preamble': '',
197 | }
198 | 
199 | # Grouping the document tree into LaTeX files. List of tuples
200 | # (source start file, target name, title,
201 | #  author, documentclass [howto, manual, or own class]).
202 | latex_documents = [
203 |   ('index', 'scorched.tex', u'scorched Documentation',
204 |    u'Josip Delic', 'manual'),
205 | ]
206 | 
207 | # The name of an image file (relative to this directory) to place at the top of
208 | # the title page.
209 | #latex_logo = None
210 | 
211 | # For "manual" documents, if this is true, then toplevel headings are parts,
212 | # not chapters.
213 | #latex_use_parts = False
214 | 
215 | # If true, show page references after internal links.
216 | #latex_show_pagerefs = False
217 | 
218 | # If true, show URL addresses after external links.
219 | #latex_show_urls = False
220 | 
221 | # Documents to append as an appendix to all manuals.
222 | #latex_appendices = []
223 | 
224 | # If false, no module index is generated.
225 | #latex_domain_indices = True
226 | 
227 | 
228 | # -- Options for manual page output ---------------------------------------
229 | 
230 | # One entry per manual page. List of tuples
231 | # (source start file, name, description, authors, manual section).
232 | man_pages = [
233 |     ('index', 'scorched', u'scorched Documentation',
234 |      [u'Josip Delic'], 1)
235 | ]
236 | 
237 | # If true, show URL addresses after external links.
238 | #man_show_urls = False
239 | 
240 | 
241 | # -- Options for Texinfo output -------------------------------------------
242 | 
243 | # Grouping the document tree into Texinfo files. List of tuples
244 | # (source start file, target name, title, author,
245 | #  dir menu entry, description, category)
246 | texinfo_documents = [
247 |   ('index', 'scorched', u'scorched Documentation',
248 |    u'Josip Delic', 'scorched', 'One line description of project.',
249 |    'Miscellaneous'),
250 | ]
251 | 
252 | # Documents to append as an appendix to all manuals.
253 | #texinfo_appendices = []
254 | 
255 | # If false, no module index is generated.
256 | #texinfo_domain_indices = True
257 | 
258 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
259 | #texinfo_show_urls = 'footnote'
260 | 
261 | # If true, do not generate a @detailmenu in the "Top" node's menu.
262 | #texinfo_no_detailmenu = False
263 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. scorched documentation master file, created by
 2 |    sphinx-quickstart on Wed Mar 12 21:48:32 2014.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to scorched's documentation!
 7 | ====================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    usage.rst
15 |    query.rst
16 |    mlt.rst
17 |    api.rst
18 | 
19 | 
20 | Indices and tables
21 | ==================
22 | 
23 | * :ref:`genindex`
24 | * :ref:`modindex`
25 | * :ref:`search`
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\scorched.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\scorched.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/mlt.rst:
--------------------------------------------------------------------------------
  1 | .. _mlt:
  2 | 
  3 | More Like This queries
  4 | ======================
  5 | 
  6 | More Like This (MLT) is a feature of Solr which provides for comparisons of
  7 | documents; you can ask Solr to tell you about any More documents it has that
  8 | are Like This one.
  9 | 
 10 | An MLT query can be part of a standard query (see
 11 | :ref:`standard-query-more-like-this`.), in which case you're asking Solr to
 12 | tell you not only about immediate query results, but also about any other
 13 | results which are similar to the results you've got.
 14 | 
 15 | Alternatively, you can feed Solr an entire document that is not already in its
 16 | index, and ask to do an MLT query on that document.
 17 | 
 18 | The first case is covered above in :ref:`standard-query-more-like-this`; the
 19 | second case we'll show here.
 20 | 
 21 | Basic MLT query
 22 | ---------------
 23 | 
 24 | Instead of calling the ``query`` method on the interface, we call the
 25 | ``mlt_query`` method.
 26 | 
 27 | ::
 28 | 
 29 |     >>> si.mlt_query(fields="name", content=open("localfile").read())
 30 | 
 31 | We give the MLT handler some content (sourced in this case from a local file);
 32 | the MLT query will take this text, analyze it, and retrieve documents that are
 33 | similar according to the results of its analysis.
 34 | 
 35 | The results are returned in the same format as illustrated in the ``mlt()``
 36 | method.
 37 | 
 38 | Further MLT query options
 39 | -------------------------
 40 | 
 41 | If we wanted similarity to be calculated with respect to a different field or
 42 | fields.:
 43 | 
 44 | ::
 45 | 
 46 |     >>> si.mlt_query(content=open("localfile").read(),
 47 |     ...              fields=["name", "author_t"])
 48 | 
 49 | We can understand a little more about why we get the results we do by asking
 50 | for the result of the MLT document analysis.
 51 | 
 52 | ::
 53 | 
 54 |     >>> si.mlt_query(fields="name", content=open("localfile").read(),
 55 |     ...              interestingTerms="list")
 56 |     >>> si.mlt_query(fields="name", content=open("localfile").read(),
 57 |     ...              interestingTerms="details")
 58 | 
 59 | "list" will return a list of the interesting terms extracted; "details" will
 60 | also provide details of the boost used for each term.
 61 | 
 62 | If the document you're supplying is not encoded in UTF-8 (or equivalently
 63 | ASCII) format, then you need to specify the charset in use (using the list
 64 | available at http://docs.python.org/library/codecs.html#standard-encodings:
 65 | 
 66 | ::
 67 | 
 68 |     >>> si.mlt_query(fields="name", content=open("localfile").read(),
 69 |     ...              content_charset="iso-8859-1")
 70 | 
 71 | Sourcing content from the web
 72 | -----------------------------
 73 | 
 74 | You can also choose to tell Solr to source the document from the web, by giving
 75 | the URL for the content rather than supplying it yourself:
 76 | 
 77 | ::
 78 | 
 79 |     >>> si.mlt_query(fields="name", url="http://example.com/document")
 80 | 
 81 | All the other options above still apply to URL-sourced content, except for
 82 | "content_charset"; that's up to the webserver where the content is stored.
 83 | 
 84 | In all the cases above, you can also specify any of the other options shown in
 85 | ``mlt()``, apart from "count".
 86 | 
 87 | MLT queries on indexed content
 88 | ------------------------------
 89 | 
 90 | You can perform an MLT query on indexed content in the following way:
 91 | 
 92 | ::
 93 | 
 94 |     >>> res = si.mlt_query("genre_s", interestingTerms="details",
 95 |     ...                    mintf=1, mindf=1).query(
 96 |     ...                    id="978-0641723445").execute()
 97 |     >>> res.result.docs
 98 |     [{u'_version_': 1462917302263480320,
 99 |       u'author': u'Rick Riordan',
100 |       u'author_s': u'Rick Riordan',
101 |       u'cat': [u'book', u'paperback'],
102 |       u'genre_s': u'fantasy',
103 |       u'id': u'978-1423103349',
104 |       u'inStock': True,
105 |       u'name': u'The Sea of Monsters',
106 |       u'pages_i': 304,
107 |       u'price': 6.49,
108 |       u'price_c': u'6.49,USD',
109 |       u'sequence_i': 2,
110 |       u'series_t': u'Percy Jackson and the Olympians'},
111 |      {u'_version_': 1462917302263480321,
112 |       u'author': u'Jostein Gaarder',
113 |       u'author_s': u'Jostein Gaarder',
114 |       u'cat': [u'book', u'paperback'],
115 |       u'genre_s': u'fantasy',
116 |       u'id': u'978-1857995879',
117 |       u'inStock': True,
118 |       u'name': u"Sophie's World : The Greek Philosophers",
119 |       u'pages_i': 64,
120 |       u'price': 3.07,
121 |       u'price_c': u'3.07,USD',
122 |       u'sequence_i': 1}]
123 |     >>> res.interesting_terms
124 |     >>> [u'genre_s:fantasy', 1.0]
125 | 
126 | ie - initialize an otherwise empty mlt_query object, and then run queries on it
127 | as you would run normal queries. The full range of query operations is
128 | supported when composing the query for indexed content:
129 | 
130 | ::
131 | 
132 |     >>> si.mlt_query("name").query(title='Whale').query(~si.Q(
133 |     ...     author='Melville').query(si.Q('Moby') | si.Q('Dick'))
134 | 
135 | Chaining MLT queries
136 | --------------------
137 | 
138 | The ``mlt_query()`` method is chainable in the same way as the ``query``
139 | method. There are a fre differences to note.
140 | 
141 | * You can't chain a ``query()`` onto an ``mlt_query()`` call
142 |   if the MLT query is based on supplied ``content`` or ``url``.
143 | * You can't chain multiple ``mlt_query()`` methods together - only one content
144 |   source can be considered at a time.
145 | 
146 | The ``mlt_query()`` method takes all of the mlt() options except "count".
147 | 


--------------------------------------------------------------------------------
/docs/query.rst:
--------------------------------------------------------------------------------
  1 | .. _querying:
  2 | 
  3 | Querying
  4 | ========
  5 | 
  6 | For the examples in this chapter, I'll be assuming that you've loaded your
  7 | server up with the books data supplied with the example Solr setup.
  8 | 
  9 | The data itself you can see at
 10 | ``$SOLR_SOURCE_DIR/example/exampledocs/books.json``.  To load it into a server
 11 | running with the example schema:
 12 | 
 13 | ::
 14 | 
 15 |     $ cd example/exampledocs
 16 |     $ curl 'http://localhost:8983/solr/update/json?commit=true' --data-binary \
 17 |     @exampledocs/books.json -H 'Content-type:application/json'
 18 | 
 19 | Searching solr
 20 | --------------
 21 | 
 22 | Scorched uses a chaining API, and will hopefully look quite familiar to anyone
 23 | who has used the Django ORM.
 24 | 
 25 | The ``books.json`` data looked like this::
 26 | 
 27 |     [
 28 |         {
 29 |             "id" : "978-0641723445",
 30 |             "cat" : ["book","hardcover"],
 31 |             "name" : "The Lightning Thief",
 32 |             "author" : "Rick Riordan",
 33 |             "series_t" : "Percy Jackson and the Olympians",
 34 |             "sequence_i" : 1,
 35 |             "genre_s" : "fantasy",
 36 |             "inStock" : true,
 37 |             "price" : 12.50,
 38 |             "pages_i" : 384
 39 |         }
 40 |     ...
 41 |     ]
 42 | 
 43 | .. note:: Dynamic fields.
 44 | 
 45 |     Dynamic fields are named with a suffix (*_i, *_t, *_s).
 46 | 
 47 | A simple search for one word, in the default search field.
 48 | 
 49 | ::
 50 | 
 51 |     >>> si.query("thief")
 52 | 
 53 | Maybe you want to search in the (non-default) field author for authors called
 54 | Martin
 55 | 
 56 | ::
 57 | 
 58 |     >>> si.query(author="rick")
 59 | 
 60 | Maybe you want to search for books with "thief" in their title, by an author
 61 | called "rick".
 62 | 
 63 | ::
 64 | 
 65 |     >>> si.query(name="thief", author="rick")
 66 | 
 67 | Perhaps your initial, default, search is more complex, and has more than one
 68 | word in it:
 69 | 
 70 | ::
 71 | 
 72 |     >>> si.query(name="lightning").query(name="thief")
 73 | 
 74 | A easy way to see what sunburnt is producing is to call ``options``::
 75 | 
 76 |     >>> si.query(name="lightning").query(name="thief").options()
 77 |     {'q': u'name:lightning AND name:thief'}
 78 | 
 79 | Executing queries
 80 | -----------------------------------------------
 81 | 
 82 | Scorched is lazy in constructing queries. The examples in the previous section
 83 | don’t actually perform the query - they just create a "query object" with the
 84 | correct parameters. To actually get the results of the query, you’ll need to
 85 | execute it:
 86 | 
 87 | ::
 88 | 
 89 |     >>> response = si.query("thief").execute()
 90 | 
 91 | This will return a ``SolrResponse`` object. If you treat this object as a list,
 92 | then each member of the list will be a document, in the form of a Python
 93 | dictionary containing the relevant fields:
 94 | 
 95 | For example, if you run the first example query above, you should see a
 96 | response like this:
 97 | 
 98 | ::
 99 | 
100 |     >>> for result in si.query("thief").execute():
101 |     ...    print result
102 |     {
103 |         u'name': u'The Lightning Thief',
104 |         u'author': u'Rick Riordan',
105 |         u'series_t': u'Percy Jackson and the Olympians',
106 |         u'pages_i': 384,
107 |         u'genre_s': u'fantasy',
108 |         u'author_s': u'Rick Riordan',
109 |         u'price': 12.5,
110 |         u'price_c': u'12.5,USD',
111 |         u'sequence_i': 1,
112 |         u'inStock': True,
113 |         u'_version_': 1462820023761371136,
114 |         u'cat': [u'book', u'hardcover'],
115 |         u'id': u'978-0641723445'
116 |     }
117 | 
118 | Of course, often you don’t want your results in the form of a dictionary,
119 | you want an object.  Perhaps you have the following class defined in your code:
120 | 
121 | ::
122 | 
123 |     >>> class Book:
124 |     ...     def __init__(self, name, author, **other_kwargs):
125 |     ...         self.title = name
126 |     ...         self.author = author
127 |     ...         self.other_kwargs = other_kwargs
128 |     ...
129 |     ...     def __repr__(self):
130 |     ...         return 'Book("%s", "%s")' % (self.title, self.author)
131 | 
132 | 
133 | You can tell scorched to give you ``Book`` instances back by telling
134 | ``execute()`` to use the class as a constructor.
135 | 
136 | ::
137 | 
138 |     >>> for result in si.query("game").execute(constructor=Book):
139 |     ...     print result
140 |     Book("The Lightning Thief", "Rick Riordan")
141 | 
142 | The ``constructor`` argument most often will be a class, but it can be any
143 | callable; it will always be called as ``constructor(**response_dict)``.
144 | 
145 | 
146 | You can extract more information from the response than simply the list of
147 | results. The SolrResponse object has the following attributes:
148 | 
149 | * ``response.status`` : status of query. (status != 0 something went wrong).
150 | * ``response.QTime`` : how long did the query take in milliseconds.
151 | * ``response.params`` : the params that were used in the query.
152 | 
153 | and the results themselves are in the following attributes
154 | 
155 | * ``response.result`` : the results of your main query.
156 | * ``response.result.groups`` : see `Result greater`_ below.
157 | * ``response.facet_counts`` : see `Faceting`_ below.
158 | * ``response.highlighting`` : see `Highlighting`_ below.
159 | * ``response.more_like_these`` : see `More Like This`_ below.
160 | 
161 | Finally, ``response.result`` itself has the following attributes
162 | 
163 | * ``response.result.numFound`` : total number of docs found in the index.
164 | * ``response.result.docs`` : the actual results themselves.
165 | * ``response.result.start`` : if the number of docs is less than numFound,
166 |                               then this is the pagination offset.
167 | 
168 | Pagination
169 | ----------
170 | 
171 | By default, Solr will only return the first 10 results (this is configurable in
172 | ``schema.xml``). To get at more results, you need to tell solr to paginate
173 | further through the results. You do this by applying the ``paginate()`` method,
174 | which takes two parameters, ``start`` and ``rows``:
175 | 
176 | ::
177 | 
178 |     >>> si.query("black").paginate(start=10, rows=30)
179 | 
180 | Cursors
181 | -------
182 | If you want to get all / a huge number of results, you should use cursors to get
183 | the results in smaller chunks. Due to the way this is implemented in Solr, your
184 | sort needs to include your uniqueKey field. The ``cursor()`` method returns a
185 | cursor that you can iterate over. Like ``execute()``, ``cursor()`` takes an
186 | optional ``constructor`` parameter. In addition you can pass ``rows`` to define
187 | how many results should be fetched from Solr at once.
188 | 
189 | ::
190 | 
191 |     >>> for item in si.query("black").sort_by('id').cursor(rows=100): ...
192 | 
193 | Returning different fields
194 | --------------------------
195 | 
196 | By default, Solr will return all stored fields in the results. You might only
197 | be interested in a subset of those fields. To restrict the fields Solr returns,
198 | you apply the ``field_limit()`` methods.
199 | 
200 | ::
201 | 
202 |     >>> si.query("game").field_limit("id")
203 |     >>> si.query("game").field_limit(["id", "name"])
204 | 
205 | You can use the same option to get hold of the relevancy score that Solr
206 | has calculated for each document in the query:
207 | 
208 | ::
209 | 
210 |     >>> si.query("game").field_limit(score=True) # Return the score alongside each document
211 |     >>> si.query("game").field_limit("id", score=True") # return just the id and score.
212 | 
213 | The results appear just like the normal dictionary responses, but with a different
214 | selection of fields.
215 | 
216 | ::
217 | 
218 |     >>> for result in si.query("thief").field_limit("id", score=True"):
219 |     ...     print result
220 |     {u'score': 0.6349302, u'id': u'978-0641723445'}
221 | 
222 | More complex queries
223 | --------------------
224 | 
225 | In our books example, there are two numerical fields - the ``price`` (which is
226 | a float) and ``sequence_i`` (which is an integer).  Numerical fields can be
227 | queried:
228 | 
229 | * exactly
230 | * by comparison (``<`` / ``<=`` / ``>=`` / ``>``)
231 | * by range (between two values)
232 | 
233 | Exact queries
234 | ~~~~~~~~~~~~~
235 | 
236 | Don't try and query floats exactly unless you really know what you're doing
237 | (http://download.oracle.com/docs/cd/E19957-01/806-3568/ncg_goldberg.html). Solr
238 | will let you, but you almost certainly don't want to. Querying integers exactly
239 | is fine though.
240 | 
241 | ::
242 | 
243 |     >>> si.query(sequence_i=1)
244 | 
245 | Comparison queries
246 | ~~~~~~~~~~~~~~~~~~
247 | 
248 | These use a new syntax:
249 | 
250 | ::
251 | 
252 |     >>> si.query(price__lt=7)
253 | 
254 | Notice the double-underscore separating "price" from "lt". It will search for
255 | all books whose price is less than 7. You can do similar searches on any float
256 | or integer field, and you can use:
257 | 
258 | * ``gt`` : greater than, ``>``
259 | * ``gte`` : greater than or equal to, ``>=``
260 | * ``lt`` : less than, ``<``
261 | * ``lte`` : less than or equal to, ``<=``
262 | 
263 | Range queries
264 | ~~~~~~~~~~~~~
265 | 
266 | As an extension of a comparison query, you can query for values that are within
267 | a range, ie between two different numbers.
268 | 
269 | ::
270 | 
271 |     >>> si.query(price__range=(5, 7)) # all books with prices between 5 and 7.
272 | 
273 | This range query is *inclusive* - it will return prices of books which are
274 | priced at exactly 5 or exactly 7. You can also make an *exclusive* search:
275 | 
276 | ::
277 | 
278 |     >>> si.query(price__rangeexc=(5, 7))
279 | 
280 | Which will exclude books priced at exactly 5 or 7.
281 | 
282 | Finally, you can also do a completely open range search:
283 | 
284 | ::
285 | 
286 |     >>> si.query(price__any=True)
287 | 
288 | Will search for a book which has *any* price. Why would you do this? Well, if
289 | you had a schema where price was *optional*, then this search would return all
290 | books which had a price - and exclude any books which didn’t have a price.
291 | 
292 | Date queries
293 | ~~~~~~~~~~~~
294 | 
295 | You can query on dates the same way as you can query on numbers: exactly, by
296 | comparison, or by range.
297 | 
298 | Be warned, though, that exact searching on date suffers from similar problems
299 | to exact searching on floating point numbers. Solr stores all dates to
300 | microsecond precision; exact searching will fail unless the date requested is
301 | also correct to microsecond precision.
302 | 
303 | ::
304 | 
305 |     >>> si.query(date_dt=datetime.datetime(2006, 02, 13))
306 | 
307 | Will search for items whose manufacture date is *exactly* zero microseconds
308 | after midnight on the 13th February, 2006.
309 | 
310 | More likely you'll want to search by comparison or by range:
311 | 
312 | ::
313 | 
314 |     # all items after the 1st January 2006
315 |     >>> si.query(date_dt__gt=datetime.datetime(2006, 1, 1))
316 | 
317 |     # all items in Q1 2006.
318 |     >>> si.query(date_dt__range=(datetime.datetime(2006, 1, 1), datetime.datetime(2006, 4, 1))
319 | 
320 | The argument to a date query can be any object that looks roughly like a Python
321 | ``datetime`` object or a string in W3C Datetime notation
322 | (http://www.w3.org/TR/NOTE-datetime)
323 | 
324 | ::
325 | 
326 |     >>> si.query(date_dt__gte="2006")
327 |     >>> si.query(date_dt__lt="2009-04-13")
328 |     >>> si.query(date_dt__range=("2010-03-04 00:34:21", "2011-02-17 09:21:44"))
329 | 
330 | Boolean fields
331 | ~~~~~~~~~~~~~~
332 | 
333 | Boolean fields are flags on a document. In the example hardware specs,
334 | documents carry an ``inStock`` field. We can select on that by doing:
335 | 
336 | ::
337 | 
338 |     >>> si.query("thief", inStock=True)
339 | 
340 | 
341 | Sorting results
342 | ---------------
343 | 
344 | Solr will return results in "relevancy" order. How Solr determines relevancy is
345 | a complex question, and can depend highly on your specific setup. However, it’s
346 | possible to override this and sort query results by another field. This field
347 | must be sortable, so most likely your'’d use a numerical or date field.
348 | 
349 | ::
350 | 
351 |     >>> si.query("thief").sort_by("price") # ascending price
352 |     >>> si.query("thief").sort_by("-price") # descending price
353 | 
354 | You can also sort on multiple factors:
355 | 
356 | ::
357 | 
358 |     >>> si.query("thief").sort_by("-price").sort_by("score")
359 | 
360 | This query will sort first by descending price, and then by increasing "score"
361 | (which is what Solr calls relevancy).
362 | 
363 | 
364 | Complex queries
365 | ---------------
366 | 
367 | Scorched queries can be chained together in all sorts of ways, with
368 | query terms being applied.
369 | 
370 | What we do is construct two *query objects*, one for each condition, and ``OR``
371 | them together.
372 | 
373 | ::
374 | 
375 |     >>> si.query(si.Q("thief") | si.Q("sea"))
376 | 
377 | The ``Q`` object can contain an arbitrary query, and can then be combined using
378 | Boolean logic (here, using ``|``, the OR operator). The result can then be
379 | passed to a normal ``si.query()`` call for execution.
380 | 
381 | ``Q`` objects can be combined using any of the Boolean operators, so
382 | also ``&`` (``AND``) and ``~`` (``NOT``), and can be nested within each
383 | other.
384 | 
385 | A moderately complex query could be written:
386 | 
387 | ::
388 | 
389 |     >>> query = si.query(si.Q(si.Q("thief") & ~si.Q(author="ostein")) \
390 |     | si.Q(si.Q("foo") & ~si.Q(author="bui")))
391 | 
392 | Which will producse this query:
393 | 
394 | ::
395 | 
396 |     >>> query.options()
397 |     {'q': u'(thief AND (*:* AND NOT author:ostein)) OR (foo AND (*:* AND NOT author:bui))'}
398 | 
399 | 
400 | Excluding results from queries
401 | ------------------------------
402 | 
403 | If we want to *exclude* results by some criteria we use the ``~si.Q()``.
404 | 
405 | ::
406 | 
407 |     >>> si.query(~si.Q(author="Rick Riordan"))
408 | 
409 | 
410 | Wildcard searching
411 | ------------------
412 | 
413 | You can use asterisks and question marks in the normal way, except that you may
414 | not use leading wildcards - ie no wildcards at the beginning of a term.
415 | 
416 | Search for book with "thie" in the name:
417 | 
418 | ::
419 | 
420 |     >>> si.query(name=scorched.strings.WildcardString("thie*"))
421 | 
422 | If, for some reason, you want to search exactly for a string with an asterisk
423 | or a question mark in it then you need to tell Solr to special case it:
424 | 
425 | ::
426 | 
427 |     >>> si.query(id=RawString("055323933?*"))
428 | 
429 | This will search for a document whose id contains *exactly* the string given,
430 | including the question mark and asterisk.
431 | 
432 | 
433 | Filter queries
434 | --------------
435 | 
436 | Solr implements several internal caching layers, and to some extent you can
437 | control when and how they're used.
438 | 
439 | Often, you find that you can partition your query; one part is run many times
440 | without change, or with very limited change, and another part varies much more.
441 | (See http://wiki.apache.org/solr/FilterQueryGuidance for more guidance.)
442 | 
443 | If you taking search input from the user, you would write:
444 | 
445 | ::
446 | 
447 |     >>> si.query(name=user_input).filter(price__lt=7.5)
448 |     >>> si.query(name=user_input).filter(price__gte=7.5)
449 | 
450 | Adding multiple filter::
451 | 
452 |     >>> si.query(name="bla").filter(price__lt=7.5).filter(author="hans").options()
453 |     {'fq': [u'author:hans', u'price:{* TO 7.5}'], 'q': u'name:bla'}
454 | 
455 | 
456 | You can filter any sort of query, simply by using ``filter()`` instead of
457 | ``query()``. And if your filtering involves an exclusion, then simple use
458 | ``~si.Q(author="lloyd")``.
459 | 
460 | ::
461 | 
462 |     >>> si.query(title="black").filter(~si.Q(author="lloyd")).options()
463 |     {'fq': u'NOT author:lloyd', 'q': u'title:black'}
464 | 
465 | It's possible to mix and match ``query()`` and ``filter()`` calls as much as
466 | you like while chaining. The resulting filter queries will be combined and
467 | cached together. The argument to a ``filter()`` call can be an combination of
468 | ``si.Q`` objects.
469 | 
470 | ::
471 | 
472 |     >>> si.query(title="black").filter(
473 |     ...     si.Q(si.Q(name="thief") & ~si.Q(author="ostein"))
474 |     ...         ).filter(si.Q(si.Q(title="foo") & ~si.Q(author="bui"))
475 |     ... ).options()
476 |     {'fq': [u'name:thief', u'title:foo', u'NOT author:ostein', u'NOT author:bui'],
477 |      'q': u'title:black'}
478 | 
479 | Boosting
480 | ---------
481 | 
482 | Solr provides a mechanism for "boosting" results according to the values of
483 | various fields (See
484 | http://wiki.apache.org/solr/SolrRelevancyCookbook#Boosting_Ranking_Terms for a
485 | full explanation).
486 | 
487 | 
488 | Boosts the importance of the author field by 3.
489 | 
490 | ::
491 | 
492 |     >>> si.query(si.Q("black") | si.Q(author="lloyd")**3).options()
493 |     {'q': u'black OR author:lloyd^3'}
494 | 
495 | 
496 | A more common pattern is that you want all books with "black" in the title *and
497 | you have a preference for those authored by Lloyd Alexander*. This is different
498 | from the last query; the last query would return books by Lloyd Alexander which
499 | did not have "black" in the title. Achieving this in Solr is possible, but a
500 | little awkward; scorched provides a shortcut for this pattern.
501 | 
502 | ::
503 | 
504 |     >>> si.query("black").boost_relevancy(3, author_t="lloyd").options()
505 |     {'q': u'black OR (black AND author_t:lloyd^3)'}
506 | 
507 | This is fully chainable, and ``boost_relevancy`` can take an arbitrary
508 | collection of query objects.
509 | 
510 | Faceting
511 | --------
512 | 
513 | For background, see http://wiki.apache.org/solr/SimpleFacetParameters.
514 | 
515 | Scorched lets you apply faceting to any query, with the ``facet_by()`` method,
516 | chainable on a query object. The ``facet_by()`` method needs, at least, a field
517 | (or list of fields) to facet on:
518 | 
519 | ::
520 | 
521 |     >>> facet_query = si.query("thief").facet_by("sequence_i").paginate(rows=0)
522 | 
523 | The above fragment will search for game with "thrones" in the title, and facet
524 | the results according to the value of ``sequence_i``. It will also return zero
525 | results, just the facet output.
526 | 
527 | ::
528 | 
529 |     >>> print facet_query.execute().facet_counts.facet_fields
530 |     {u'sequence_i': [(u'1', 1), (u'2', 0)]}
531 | 
532 | The ``facet_counts`` objects contains several sets of results - here, we're
533 | only interested in the ``facet_fields`` object. This contains a dictionary of
534 | results, keyed by each field where faceting was requested. The dictionary value
535 | is a list of two-tuples, mapping the value of the faceted field.
536 | 
537 | You can facet on more than one field at a time:
538 | 
539 | ::
540 | 
541 |     >>> si.query(...).facet_by(fields=["field1", "field2, ...])
542 | 
543 | The ``facet_fields`` dictionary will have more than one key.
544 | 
545 | Solr supports a number of parameters to the faceting operation. All of the
546 | basic options are exposed through scorched:
547 | 
548 | ::
549 | 
550 |     fields, prefix, sort, limit, offset, mincount, missing, method,
551 |     enum.cache.minDf
552 | 
553 | All of these can be used as keyword arguments to the ``facet()`` call, except
554 | of course the last one since it contains periods. To pass keyword arguments
555 | with periods in them, you can use `**` syntax:
556 | 
557 | You can facet by ranges. The following query will return range facets over
558 | ``field1``: 0-10, 11-20, 21-30, etc. The ``mincount`` parameter can be used to
559 | return only those facets which contain a minimum number of results.
560 | 
561 | ::
562 | 
563 |     >>> si.query(...).facet_range(fields='field1', start=0, gap=10, end=100, \
564 |                                   limit=10, mincount=1)
565 | 
566 | Alternatively, you create ranges of dates using Solr's `date math` syntax. This
567 | next example creates a facet for each of the last 12 months.
568 | 
569 | ::
570 | 
571 |     >>> si.query(...).facet_range(fields='field1', start='NOW-12MONTHS/MONTH', \
572 |                                   gap='+1MONTHS', end='NOW/MONTH')
573 | 
574 | See
575 | https://cwiki.apache.org/confluence/display/solr/Working+with+Dates#WorkingwithDates-DateMath
576 | for more details on `date math` syntax.
577 | 
578 | ::
579 | 
580 |     >>> facet(**{"enum.cache.minDf":25})
581 | 
582 | You can also facet on the result of one or more queries, using the
583 | ``facet_query()`` method. For example:
584 | 
585 | ::
586 | 
587 |     >>> fquery = si.query("game").facet_query(price__lt=7).facet_query(price__gte=7)
588 |     >>> print fquery.execute().facet_counts.facet_queries
589 |     [('price:[7.0 TO *]', 1), ('price:{* TO 7.0}', 1)]
590 | 
591 | This will facet the results according to the two queries specified, so you can
592 | see how many of the results cost less than 7, and how many cost more.
593 | 
594 | The results come back this time in the ``facet_queries`` object, but have the
595 | same form as before. The facets are shown as a list of tuples, mapping query
596 | to number of results.
597 | 
598 | Facet pivot TODO https://wiki.apache.org/solr/HierarchicalFaceting#Pivot_Facets
599 | 
600 | Result grouping
601 | ---------------
602 | 
603 | For background, see http://wiki.apache.org/solr/FieldCollapsing.
604 | 
605 | Solr 3.3 added support for result grouping.
606 | 
607 | An example call looks like this:
608 | 
609 | ::
610 | 
611 |     >>> resp = si.query().group_by('genre_s', limit=10).execute()
612 |     >>> for g in resp.groups['genre_s']['groups']:
613 |     ...     print "%s #%s" % (g['groupValue'], len(g['doclist']['docs']))
614 |     ...     for d in  g['doclist']['docs']:
615 |     ...         print "\t%s" % d['name']
616 |     fantasy #3
617 |         The Lightning Thief
618 |         The Sea of Monsters
619 |         Sophie's World : The Greek Philosophers
620 |     IT #1
621 |         Lucene in Action, Second Edition
622 | 
623 | Highlighting
624 | ------------
625 | 
626 | For background, see http://wiki.apache.org/solr/HighlightingParameters.
627 | 
628 | Alongside the normal search results, you can ask Solr to return fragments of
629 | the documents, with relevant search terms highlighted. You do this with the
630 | chainable ``highlight()`` method.
631 | 
632 | Specify which field we would like to see highlighted:
633 | 
634 | ::
635 | 
636 |     >>> resp = si.query('thief').highlight('name').execute()
637 |     >>> resp.highlighting
638 |     {u'978-0641723445': {u'name': [u'The Lightning <em>Thief</em>']}}
639 | 
640 | It is also possible to specify a array of fields::
641 | 
642 |     >>> si.query('thief').highlight(['name', 'title']).options()
643 |     {'hl': True, 'hl.fl': 'name,title', 'q': u'thief'}
644 | 
645 | Highlighting values will also be included in ``response.result.doc` and grouped
646 | results as a ``solr_highlights` attribute so that they can be accessed during result
647 | iteration.
648 | 
649 | PostingsHighlighter
650 | -------------------
651 | 
652 | For background, see https://wiki.apache.org/solr/PostingsHighlighter.
653 | 
654 | PostingsHighlighter is a new highlighter in Solr4.3 to summarize documents
655 | for summary results. You do this with the
656 | chainable ``postings_highlight()`` method.
657 | 
658 | Specify which field we would like to see highlighted:
659 | 
660 | ::
661 | 
662 |     >>> resp = si.query('thief').postings_highlight('name').execute()
663 |     >>> resp.highlighting
664 |     {u'978-0641723445': {u'name': [u'The Lightning <em>Thief</em>']}}
665 | 
666 | It is also possible to specify a array of fields::
667 | 
668 |     >>> si.query('thief').postings_highlight(['name', 'title']).options()
669 |     {'hl': True, 'hl.fl': 'name,title', 'q': u'thief'}
670 | 
671 | 
672 | Term Vectors
673 | ------------
674 | 
675 | For background, see https://wiki.apache.org/solr/TermVectorComponent.
676 | 
677 | Alongside the normal search results, you can ask solr to return the term
678 | vector, the term frequency, inverse document frequency, and position and offset
679 | information for the documents.
680 | You do this with the chainable ``term_vector()`` method.
681 | 
682 | ::
683 | 
684 |     >>> resp = si.query('thief').term_vector(all=True).execute()
685 | 
686 | You can also specify for which fields you would like to get information:
687 | 
688 | ::
689 | 
690 |     >>> resp = si.query('thief').term_vector('name').execute()
691 | 
692 | It is also possible to specify a array of fields::
693 | 
694 |     >>> si.query('thief').term_vector(['name', 'title'], all=True).execute()
695 | 
696 | 
697 | More Like This
698 | --------------
699 | 
700 | For background, see http://wiki.apache.org/solr/MoreLikeThis. Alongside a set
701 | of search results, Solr can suggest other documents that are similar to each of
702 | the documents in the search result.
703 | 
704 | More-like-this searches are accomplished with the ``mlt()`` chainable option.
705 | Solr needs to know which fields to consider when deciding similarity.
706 | 
707 | ::
708 | 
709 |     >>> resp = si.query(id="978-0641723445").mlt("genre_s", mintf=1, mindf=1).execute()
710 |     >>> resp.more_like_these
711 |     {u'978-0641723445': <scorched.response.SolrResult at 0x28b6350>}
712 | 
713 |     >>> resp.more_like_these['978-0641723445'].docs
714 |     [{u'_version_': 1462820023772905472,
715 |       u'author': u'Rick Riordan',
716 |       u'author_s': u'Rick Riordan',
717 |       u'cat': [u'book', u'paperback'],
718 |       u'genre_s': u'fantasy',
719 |       u'id': u'978-1423103349',
720 |       u'inStock': True,
721 |       u'name': u'The Sea of Monsters',
722 |       u'pages_i': 304,
723 |       u'price': 6.49,
724 |       u'price_c': u'6.49,USD',
725 |       u'sequence_i': 2,
726 |       u'series_t': u'Percy Jackson and the Olympians'},
727 |      {u'_version_': 1462820023776051200,
728 |       u'author': u'Jostein Gaarder',
729 |       u'author_s': u'Jostein Gaarder',
730 |       u'cat': [u'book', u'paperback'],
731 |       u'genre_s': u'fantasy',
732 |       u'id': u'978-1857995879',
733 |       u'inStock': True,
734 |       u'name': u"Sophie's World : The Greek Philosophers",
735 |       u'pages_i': 64,
736 |       u'price': 3.07,
737 |       u'price_c': u'3.07,USD',
738 |       u'sequence_i': 1}]
739 | 
740 | Here we used ``mlt()`` options to alter the default behaviour (because our
741 | corpus is so small that Solr wouldn't find any similar documents with the
742 | standard behaviour.
743 | 
744 | The ``SolrResponse`` object has a ``more_like_these`` attribute. This is a
745 | dictionary of ``SolrResult`` objects, one dictionary entry for each result of
746 | the main query. Here, the query only produced one result (because we searched
747 | on the ``uniqueKey``. Inspecting the ``SolrResult`` object, we find that it
748 | contains only one document.
749 | 
750 | We can read the above result as saying that under the ``mlt()`` parameters
751 | requested, there was only one document similar to the search result.
752 | 
753 | To avoid having to do the extra dictionary lookup.
754 | 
755 | ``mlt()`` also takes a list of options (see the Solr documentation for a full explanation);
756 | 
757 | ::
758 | 
759 |     fields, count, mintf, mindf, minwl, mawl, maxqt, maxntp, boost
760 | 
761 | 
762 | Alternative parser
763 | -----------------
764 | 
765 | Scorched supports the `dismax` and `edismax` parser. These can be added by
766 | simply calling ``alt_parser``.
767 | 
768 | Example::
769 | 
770 |     >>> si.query().alt_parser('edismax', mm=2).options()
771 |     {'defType': 'edismax', 'mm': 2, 'q': '*:*'}
772 | 
773 | The `edismax` parser also supports field aliases. Here is an example where
774 | ``foo`` is aliased to the fields ``bar`` and ``baz``.
775 | 
776 | Example::
777 | 
778 |     >>> si.query().alt_parser('edismax', f={'foo':['bar', 'baz']}).options()
779 |     {'defType': 'edismax', 'q': '*:*', 'f.foo.qf': 'bar baz'}
780 | 
781 | 
782 | Set request handler
783 | -------------------
784 | 
785 | For background, see https://wiki.apache.org/solr/SolrRequestHandler.
786 | It is possible to set the request handler. To set a different request handler
787 | use ``set_requesthandler``.
788 | 
789 | Example::
790 | 
791 |     >>> si.query().set_requesthandler('foo').options()
792 |     {u'q': u'*:*', u'qt': 'foo'}
793 | 
794 | Set debug
795 | ---------
796 | 
797 | For background, see https://wiki.apache.org/solr/CommonQueryParameters#Debugging.
798 | To see what Solr is doing with our query we need sometimes more info. To get
799 | this additional information we set ``debug``.
800 | 
801 | Example::
802 | 
803 |     >>> si.query().debug().options()
804 |     {u'debugQuery': True, u'q': u'*:*'}
805 |     >>>  si.query().debug().execute().debug
806 |     {u'QParser': u'LuceneQParser',
807 |     u'explain': {u'978-1423103349': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n  1.0 = queryNorm\n',
808 |      u'978-1857995879': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n  1.0 = queryNorm\n',
809 |      u'978-1933988177': u'\n1.0 = (MATCH) MatchAllDocsQuery, product of:\n  1.0 = queryNorm\n'},
810 |     u'parsedquery': u'MatchAllDocsQuery(*:*)',
811 |     u'parsedquery_toString': u'*:*',
812 |     u'querystring': u'*:*',
813 |     u'rawquerystring': u'*:*',
814 |     u'timing': {u'prepare': {u'debug': {u'time': 0.0},
815 |       u'facet': {u'time': 0.0},
816 |       u'highlight': {u'time': 0.0},
817 |       u'mlt': {u'time': 0.0},
818 |       u'query': {u'time': 0.0},
819 |       u'stats': {u'time': 0.0},
820 |       u'time': 0.0},
821 |      u'process': {u'debug': {u'time': 0.0},
822 |       u'facet': {u'time': 0.0},
823 |       u'highlight': {u'time': 0.0},
824 |       u'mlt': {u'time': 0.0},
825 |       u'query': {u'time': 1.0},
826 |       u'stats': {u'time': 0.0},
827 |       u'time': 1.0},
828 |      u'time': 1.0}}
829 | 
830 | 
831 | Enable spellchecking
832 | --------------------
833 | 
834 | For background, see http://wiki.apache.org/solr/SpellCheckComponent.
835 | It is possible to activate spellchecking in yout query. To do that,
836 | use ``spellcheck``.
837 | 
838 | 
839 | Example::
840 | 
841 |     >>> si.query().spellcheck().options()
842 |     {u'q': u'*:*', u'spellcheck': 'true'}
843 | 
844 | Realtime Get
845 | ------------
846 | 
847 | For background, see https://wiki.apache.org/solr/RealTimeGet
848 | 
849 | Solr 4.0 added support for retrieval of documents that are not yet commited.
850 | The retrieval can only by done by id: ::
851 | 
852 |     >>> resp = si.get("978-1423103349")
853 | 
854 | You can also pass multiple ids: ::
855 | 
856 |     >>> resp = si.get(["978-0641723445", "978-1423103349"])
857 | 
858 | The return value is the same as for a normal search
859 | 
860 | Stats
861 | -----
862 | 
863 | For background, see https://wiki.apache.org/solr/StatsComponent
864 | 
865 | Solr can return simple statistics for indexed numeric fields::
866 | 
867 |    >>> resp = solr.query().stats('int_field')
868 | 
869 | You can also pass multiple fields::
870 | 
871 |    >>> resp = solr.query().stats(['int_field', 'float_field'])
872 | 
873 | The resulting statistics are available on the response at
874 | ``resp.stats.stats_fields``.
875 | 
876 | 
877 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
  1 | .. _usage:
  2 | 
  3 | First steps
  4 | ===========
  5 | 
  6 | Installing scorched
  7 | -------------------
  8 | 
  9 | You can install scorched via setuptools, pip.
 10 | 
 11 | To use scorched, you'll need an Apache Solr installation. Scorched
 12 | currently requires at least version 3.6.1 of Apache Solr.
 13 | 
 14 | Using pip
 15 | ~~~~~~~~~
 16 | 
 17 | If you have `pip <http://www.pip-installer.org>`_ installed, just type:
 18 | 
 19 | ::
 20 | 
 21 |     $ pip install scorched
 22 | 
 23 | If you've got an old version of scorched installed, and want to
 24 | upgrade, then type:
 25 | 
 26 | ::
 27 | 
 28 |     $ pip install -U scorched
 29 | 
 30 | That's all you need to do; all dependencies will be pulled in automatically.
 31 | 
 32 | 
 33 | Configuring a connection
 34 | ------------------------
 35 | 
 36 | Whether you're querying or updating a Solr server, you need to set up a
 37 | connection to the Solr server. Pass the URL of the Solr server to a
 38 | SolrInterface object.
 39 | 
 40 | ::
 41 | 
 42 |     >>> import scorched
 43 |     >>> si = scorched.SolrInterface("http://localhost:8983/solr/")
 44 | 
 45 | 
 46 | .. note:: Optional arguments to connection:
 47 |           :class:`scorched.connection.SolrConnection`
 48 | 
 49 | 
 50 | Adding documents
 51 | ----------------
 52 | 
 53 | To add data to the scorched instance use a Python dictionary.
 54 | 
 55 | ::
 56 | 
 57 |     >>> document = {"id":"0553573403",
 58 |     ...             "cat":"book",
 59 |     ...             "name":"A Game of Thrones",
 60 |     ...             "price":7.99,
 61 |     ...             "inStock": True,
 62 |     ...             "author_t":
 63 |     ...             "George R.R. Martin",
 64 |     ...             "series_t":"A Song of Ice and Fire",
 65 |     ...             "sequence_i":1,
 66 |     ...             "genre_s":"fantasy"}
 67 |     >>> si.add(document)
 68 | 
 69 | You can add lists of dictionaries in the same way. Given the example
 70 | "books.json" file, you could feed it to scorched like so:
 71 | 
 72 | ::
 73 | 
 74 |     >>> file = os.path.join(os.path.dirname(__file__), "dumps",
 75 |     ...                     "books.json")
 76 |     >>> with open(file) as f:
 77 |     ...     datajson = f.read()
 78 |     ...     docs = json.loads(self.datajson)
 79 |     >>> si.add(docs)
 80 |     >>> si.commit()
 81 | 
 82 | .. note:: Optional arguments to add:
 83 | 
 84 |     See http://wiki.apache.org/solr/UpdateXmlMessages for details. Or the api
 85 |     documentation: TODO link
 86 | 
 87 | Deleting documents
 88 | ------------------
 89 | 
 90 | You can delete documents individually, or delete all documents resulting from a
 91 | query.
 92 | 
 93 | To delete documents individually, you need to pass a list of the document ids
 94 | to scorched.
 95 | 
 96 | ::
 97 | 
 98 |     >>> si.delete_by_ids([obj.id])
 99 |     >>> si.delete_by_ids([x.id for x in objs])
100 | 
101 | To delete documents by query, you construct one or more queries from `Q`
102 | objects, in the same way that you construct a query as explained in
103 | :ref:`optional-terms`.  You then pass those query into the
104 | ``delete_by_query()`` method:
105 | 
106 | ::
107 | 
108 |     >>> si.delete_by_query(query=si.Q("game"))
109 | 
110 | To clear the entire index, there is a shortcut which simply deletes every
111 | document in the index.
112 | 
113 | ::
114 | 
115 |     >>> si.delete_all()
116 | 
117 | Deletions, like additions, only take effect after a commit (or autocommit).
118 | 
119 | .. note:: Optional arguments to delete:
120 | 
121 |     See http://wiki.apache.org/solr/UpdateXmlMessages for details. Or the api
122 |     documentation: TODO link
123 | 
124 | Optimizing
125 | ----------
126 | 
127 | After updating an index with new data, it becomes fragmented and performance
128 | suffers. This means that you need to optimize the index. When and how often you
129 | do this is something you need to decide on a case by case basis.  If you only
130 | add data infrequently, you should optimize after every new update; if you
131 | trickle in data on a frequent basis, you need to think more about it.  See
132 | http://wiki.apache.org/solr/SolrPerformanceFactors#Optimization_Considerations.
133 | 
134 | Either way, to optimize an index, simply call:
135 | 
136 | ::
137 | 
138 |     >>> si.optimize()
139 | 
140 | A Solr optimize also performs a commit, so if you’re about to ``optimize()``
141 | anyway, you can leave off the preceding ``commit()``. It doesn’t particularly
142 | hurt to do both though.
143 | 
144 | Rollback
145 | --------
146 | 
147 | If you haven’t yet added/deleted documents since the last commit, you can issue
148 | a rollback to revert the index state to that of the last commit.
149 | 
150 | ::
151 | 
152 |     >>> si.rollback()
153 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.isort]
2 | profile = "black"
3 | 
4 | [tool.black]
5 | py37 = true
6 | 
7 | [tool.check-manifest]
8 | ignore = [".flake8", ".pre-commit-config.yaml", "pyproject.toml"]
9 | 


--------------------------------------------------------------------------------
/scorched/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | from scorched.connection import SolrInterface
3 | 
4 | __all__ = ['SolrInterface']
5 | 


--------------------------------------------------------------------------------
/scorched/compat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | _ver = sys.version_info
 4 | is_py2 = (_ver[0] == 2)
 5 | is_py3 = (_ver[0] == 3)
 6 | 
 7 | 
 8 | if is_py2:  # pragma: no cover
 9 |     from urllib import (quote, unquote, quote_plus, unquote_plus, urlencode,
10 |                         getproxies, proxy_bypass)
11 |     from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
12 |     from urllib2 import parse_http_list
13 |     import cookielib
14 |     from Cookie import Morsel
15 |     from StringIO import StringIO
16 |     from httplib import IncompleteRead
17 | 
18 |     builtin_str = str
19 |     bytes = str
20 |     str = unicode
21 |     basestring = basestring
22 |     numeric_types = (int, long, float)
23 | 
24 | 
25 | elif is_py3:  # pragma: no cover
26 |     from urllib.parse import (urlparse, urlunparse, urljoin, urlsplit,
27 |                               urlencode, quote, unquote, quote_plus,
28 |                               unquote_plus, urldefrag)
29 |     from urllib.request import parse_http_list, getproxies, proxy_bypass
30 |     from http import cookiejar as cookielib
31 |     from http.cookies import Morsel
32 |     from io import StringIO
33 |     from http.client import IncompleteRead
34 | 
35 |     builtin_str = str
36 |     str = str
37 |     bytes = bytes
38 |     basestring = (str, bytes)
39 |     numeric_types = (int, float)
40 | 
41 | 
42 | def python_2_unicode_compatible(cls):
43 |     """
44 |     A decorator that defines __unicode__ and __str__ methods under Python
45 |     2. Under Python 3 it does nothing.
46 | 
47 |     To support Python 2 and 3 with a single code base, define a __str__
48 |     method returning unicode text and apply this decorator to the class.
49 | 
50 |     The implementation comes from django.utils.encoding.
51 |     """
52 |     if not is_py3:  # pragma: no cover
53 |         cls.__unicode__ = cls.__str__
54 |         cls.__str__ = lambda self: self.__unicode__().encode('utf-8')
55 |     return cls
56 | 


--------------------------------------------------------------------------------
/scorched/connection.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | 
  3 | import itertools
  4 | import json
  5 | import time
  6 | import warnings
  7 | 
  8 | import requests
  9 | 
 10 | import scorched.compat
 11 | import scorched.dates
 12 | import scorched.exc
 13 | import scorched.response
 14 | import scorched.search
 15 | from scorched.compat import str
 16 | 
 17 | MAX_LENGTH_GET_URL = 2048
 18 | # Jetty default is 4096; Tomcat default is 8192; picking 2048 to be
 19 | # conservative.
 20 | 
 21 | 
 22 | def is_iter(val):
 23 |     return isinstance(val, (tuple, list))
 24 | 
 25 | 
 26 | class SolrConnection(object):
 27 |     readable = True
 28 |     writeable = True
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         url,
 33 |         http_connection,
 34 |         mode,
 35 |         retry_timeout,
 36 |         max_length_get_url,
 37 |         search_timeout=(),
 38 |     ):
 39 |         """
 40 |         :param url: url to Solr
 41 |         :type url: str
 42 |         :param http_connection: existing requests.Session object, or None to
 43 |                                 create a new one.
 44 |         :type http_connection: requests connection
 45 |         :param mode: mode (readable, writable) Solr
 46 |         :type mode: str
 47 |         :param retry_timeout: timeout until retry
 48 |         :type retry_timeout: int
 49 |         :param max_length_get_url: max length until switch to post
 50 |         :type max_length_get_url: int
 51 |         :param search_timeout: (optional) How long to wait for the server to
 52 |                                send data before giving up, as a float, or a
 53 |                                (connect timeout, read timeout) tuple.
 54 |         :type search_timeout: float or tuple
 55 |         """
 56 |         self.http_connection = http_connection or requests.Session()
 57 |         if mode == "r":
 58 |             self.writeable = False
 59 |         elif mode == "w":
 60 |             self.readable = False
 61 |         self.url = url.rstrip("/") + "/"
 62 |         self.update_url = self.url + "update/json"
 63 |         self.select_url = self.url + "select/"
 64 |         self.mlt_url = self.url + "mlt/"
 65 |         self.get_url = self.url + "get/"
 66 |         self.retry_timeout = retry_timeout
 67 |         self.max_length_get_url = max_length_get_url
 68 |         self.search_timeout = search_timeout
 69 | 
 70 |     def request(self, *args, **kwargs):
 71 |         """
 72 |         :param args: arguments
 73 |         :type args: tuple
 74 |         :param kwargs: key word arguments
 75 |         :type kwargs: dict
 76 | 
 77 |         .. todo::
 78 |             Make this api more explicit!
 79 |         """
 80 |         try:
 81 |             return self.http_connection.request(*args, **kwargs)
 82 |         except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
 83 |             if self.retry_timeout < 0:
 84 |                 raise
 85 |             time.sleep(self.retry_timeout)
 86 |             return self.http_connection.request(*args, **kwargs)
 87 | 
 88 |     def get(self, ids, fl=None):
 89 |         """
 90 |         Perform a RealTime Get
 91 |         """
 92 |         # We always send the ids parameter to force the standart output format,
 93 |         # but use the id parameter for our actual data as `ids` can no handle
 94 |         # ids with commas
 95 |         params = [
 96 |             ("ids", ""),
 97 |             ("wt", "json"),
 98 |         ]
 99 |         if is_iter(ids):
100 |             for id in ids:
101 |                 params.append(("id", id))
102 |         else:
103 |             params.append(("id", ids))
104 |         if fl:
105 |             params.append(("fl", ",".join(fl)))
106 | 
107 |         qs = scorched.compat.urlencode(params)
108 |         url = "%s?%s" % (self.get_url, qs)
109 | 
110 |         response = self.request("GET", url)
111 |         if response.status_code != 200:
112 |             raise scorched.exc.SolrError(response)
113 |         return response.text
114 | 
115 |     def update(self, update_doc, **kwargs):
116 |         """
117 |         :param update_doc: data send to Solr
118 |         :type update_doc: json data
119 |         :returns: json -- json string
120 | 
121 |         Send json to Solr
122 |         """
123 |         if not self.writeable:
124 |             raise TypeError("This Solr instance is only for reading")
125 |         body = update_doc
126 |         if body:
127 |             headers = {"Content-Type": "application/json; charset=utf-8"}
128 |         else:
129 |             headers = {}
130 |         url = self.url_for_update(**kwargs)
131 |         response = self.request("POST", url, data=body, headers=headers)
132 |         if response.status_code != 200:
133 |             raise scorched.exc.SolrError(response)
134 |         return response.text
135 | 
136 |     def url_for_update(
137 |         self,
138 |         commit=None,
139 |         commitWithin=None,
140 |         softCommit=None,
141 |         optimize=None,
142 |         waitSearcher=None,
143 |         expungeDeletes=None,
144 |         maxSegments=None,
145 |     ):
146 |         """
147 |         :param commit: optional -- commit actions
148 |         :type commit: bool
149 |         :param commitWithin: optional -- document will be added within that
150 |                              time
151 |         :type commitWithin: int
152 |         :param softCommit: optional -- performant commit without "on-disk"
153 |                            guarantee
154 |         :type softCommit: bool
155 |         :param optimize: optional -- optimize forces all of the index segments
156 |                          to be merged into a single segment first.
157 |         :type optimze: bool
158 |         :param waitSearcher: optional -- block until a new searcher is opened
159 |                              and registered as the main query searcher,
160 |         :type waitSearcher: bool
161 |         :param expungeDeletes: optional -- merge segments with deletes away
162 |         :type expungeDeletes: bool
163 |         :param maxSegments: optional -- optimizes down to at most this number
164 |                             of segments
165 |         :type maxSegments: int
166 |         :returns: str -- url with all extra paramters set
167 | 
168 |         This functions sets all extra parameters for the ``optimize`` and
169 |         ``commit`` function.
170 |         """
171 |         extra_params = {}
172 |         if commit is not None:
173 |             extra_params["commit"] = "true" if commit else "false"
174 |         if commitWithin is not None:
175 |             try:
176 |                 extra_params["commitWithin"] = int(commitWithin)
177 |             except (TypeError, ValueError):
178 |                 raise ValueError("commitWithin should be a number in milliseconds")
179 |             if extra_params["commitWithin"] < 0:
180 |                 raise ValueError("commitWithin should be a number in milliseconds")
181 |             extra_params["commitWithin"] = str(extra_params["commitWithin"])
182 |         if softCommit is not None:
183 |             extra_params["softCommit"] = "true" if softCommit else "false"
184 |         if optimize is not None:
185 |             extra_params["optimize"] = "true" if optimize else "false"
186 |         if waitSearcher is not None:
187 |             extra_params["waitSearcher"] = "true" if waitSearcher else "false"
188 |         if expungeDeletes is not None:
189 |             extra_params["expungeDeletes"] = "true" if expungeDeletes else "false"
190 |         if maxSegments is not None:
191 |             try:
192 |                 extra_params["maxSegments"] = int(maxSegments)
193 |             except (TypeError, ValueError):
194 |                 raise ValueError("maxSegments")
195 |             if extra_params["maxSegments"] <= 0:
196 |                 raise ValueError("maxSegments should be a positive number")
197 |             extra_params["maxSegments"] = str(extra_params["maxSegments"])
198 |         if "expungeDeletes" in extra_params and "commit" not in extra_params:
199 |             raise ValueError("Can't do expungeDeletes without commit")
200 |         if "maxSegments" in extra_params and "optimize" not in extra_params:
201 |             raise ValueError("Can't do maxSegments without optimize")
202 |         if extra_params:
203 |             return "%s?%s" % (
204 |                 self.update_url,
205 |                 scorched.compat.urlencode(sorted(extra_params.items())),
206 |             )
207 |         else:
208 |             return self.update_url
209 | 
210 |     def select(self, params):
211 |         """
212 |         :param params: LuceneQuery converted to a dictionary with search
213 |                        queries
214 |         :type params: dict
215 |         :returns: json -- json string
216 | 
217 |         We perform here a search on the `select` handler of Solr.
218 |         """
219 |         if not self.readable:
220 |             raise TypeError("This Solr instance is only for writing")
221 |         params.append(("wt", "json"))
222 |         qs = scorched.compat.urlencode(params)
223 |         url = "%s?%s" % (self.select_url, qs)
224 |         if len(url) > self.max_length_get_url:
225 |             warnings.warn(
226 |                 "Long query URL encountered - POSTing instead of "
227 |                 "GETting. This query will not be cached at the HTTP layer"
228 |             )
229 |             url = self.select_url
230 |             method = "POST"
231 |             kwargs = {
232 |                 "data": qs,
233 |                 "headers": {"Content-Type": "application/x-www-form-urlencoded"},
234 |             }
235 |         else:
236 |             method = "GET"
237 |             kwargs = {}
238 |         if self.search_timeout != ():
239 |             kwargs["timeout"] = self.search_timeout
240 |         response = self.request(method, url, **kwargs)
241 |         if response.status_code != 200:
242 |             raise scorched.exc.SolrError(response)
243 |         return response.text
244 | 
245 |     def mlt(self, params, content=None):
246 |         """
247 |         :param params: LuceneQuery converted to a dictionary with search
248 |                        queries
249 |         :type params: dict
250 |         :returns: json -- json string
251 | 
252 |         Perform a MoreLikeThis query using the content specified
253 |         There may be no content if stream.url is specified in the params.
254 |         """
255 |         if not self.readable:
256 |             raise TypeError("This Solr instance is only for writing")
257 |         params.append(("wt", "json"))
258 |         qs = scorched.compat.urlencode(params)
259 |         base_url = "%s?%s" % (self.mlt_url, qs)
260 |         method = "GET"
261 |         kwargs = {}
262 |         if content is None:
263 |             url = base_url
264 |         else:
265 |             get_url = "%s&stream.body=%s" % (
266 |                 base_url,
267 |                 scorched.compat.quote_plus(content),
268 |             )
269 |             if len(get_url) <= self.max_length_get_url:
270 |                 url = get_url
271 |             else:
272 |                 url = base_url
273 |                 method = "POST"
274 |                 kwargs = {
275 |                     "data": content,
276 |                     "headers": {"Content-Type": "text/plain; charset=utf-8"},
277 |                 }
278 |         response = self.request(method, url, **kwargs)
279 |         if response.status_code != 200:
280 |             raise scorched.exc.SolrError(response.content)
281 |         return response.text
282 | 
283 | 
284 | class SolrInterface(object):
285 |     remote_schema_file = "schema?wt=json"
286 | 
287 |     def __init__(
288 |         self,
289 |         url,
290 |         http_connection=None,
291 |         mode="",
292 |         retry_timeout=-1,
293 |         max_length_get_url=MAX_LENGTH_GET_URL,
294 |         search_timeout=(),
295 |     ):
296 |         """
297 |         :param url: url to Solr
298 |         :type url: str
299 |         :param http_connection: optional -- already existing connection
300 |         :type http_connection: requests connection
301 |         :param mode: optional -- mode (readable, writable) Solr
302 |         :type mode: str
303 |         :param retry_timeout: optional -- timeout until retry
304 |         :type retry_timeout: int
305 |         :param max_length_get_url: optional -- max length until switch to post
306 |         :type max_length_get_url: int
307 |         :param search_timeout: (optional) How long to wait for the server to
308 |                                send data before giving up, as a float, or a
309 |                                (connect timeout, read timeout) tuple.
310 |         :type search_timeout: float or tuple
311 |         """
312 | 
313 |         self.conn = SolrConnection(
314 |             url, http_connection, mode, retry_timeout, max_length_get_url
315 |         )
316 |         self.schema = self.init_schema()
317 |         self._datefields = self._extract_datefields(self.schema)
318 | 
319 |     def init_schema(self):
320 |         response = self.conn.request(
321 |             "GET", scorched.compat.urljoin(self.conn.url, self.remote_schema_file)
322 |         )
323 |         if response.status_code != 200:
324 |             raise EnvironmentError(
325 |                 "Couldn't retrieve schema document - status code %s\n%s"
326 |                 % (response.status_code, response.content)
327 |             )
328 |         return response.json()["schema"]
329 | 
330 |     def _extract_datefields(self, schema):
331 |         # attn: in modern solr (>=8.x) date fields are declared
332 |         # as <fieldType name="pdates" class="solr.DatePointField"
333 |         #               docValues="true" multiValued="true"/>
334 |         # instead of <fieldType name="date" class="solr.TrieDateField"
335 |         #                       precisionStep="0" positionIncrementGap="0"/>
336 |         # This schema parsing is determining the fields by name
337 |         # and not by java class type. Therefore this is error-prone.
338 |         ret = [x["name"] for x in schema["fields"] if x["type"] in ["pdate", "date"]]
339 |         ret.extend(
340 |             [
341 |                 x["name"]
342 |                 for x in schema["dynamicFields"]
343 |                 if x["type"] in ["pdate", "date"]
344 |             ]
345 |         )
346 |         return ret
347 | 
348 |     def _should_skip_value(self, value):
349 |         if value is None:
350 |             return True
351 |         if isinstance(value, dict) and "set" in value and value["set"] is None:
352 |             return True
353 |         return False
354 | 
355 |     def _prepare_date(self, value):
356 |         """Prepare a value of type date"""
357 |         if is_iter(value):
358 |             value = [str(scorched.dates.solr_date(v)) for v in value]
359 |         else:
360 |             value = str(scorched.dates.solr_date(value))
361 |         return value
362 | 
363 |     def _prepare_docs(self, docs):
364 |         prepared_docs = []
365 |         for doc in docs:
366 |             new_doc = {}
367 |             for name, value in list(doc.items()):
368 |                 # XXX remove all None fields this is needed for adding date
369 |                 # fields
370 |                 if self._should_skip_value(value):
371 |                     continue
372 |                 if scorched.dates.is_datetime_field(name, self._datefields):
373 |                     if isinstance(value, dict) and "set" in value:
374 |                         value["set"] = self._prepare_date(value["set"])
375 |                     else:
376 |                         value = self._prepare_date(value)
377 |                 new_doc[name] = value
378 |             prepared_docs.append(new_doc)
379 |         return prepared_docs
380 | 
381 |     def add(self, docs, chunk=100, **kwargs):
382 |         """
383 |         :param docs: documents to be added
384 |         :type docs: dict
385 |         :param chunk: optional -- size of chunks in which the add command
386 |         should be split
387 |         :type chunk: int
388 |         :param kwargs: optinal -- additional arguments
389 |         :type kwargs: dict
390 |         :returns: list of SolrUpdateResponse  -- A Solr response object.
391 | 
392 |         Add a document or a list of document to Solr.
393 |         """
394 |         if hasattr(docs, "items") or not is_iter(docs):
395 |             docs = [docs]
396 |         # to avoid making messages too large, we break the message every
397 |         # chunk docs.
398 |         ret = []
399 |         for doc_chunk in grouper(docs, chunk):
400 |             update_message = json.dumps(self._prepare_docs(doc_chunk))
401 |             ret.append(
402 |                 scorched.response.SolrUpdateResponse.from_json(
403 |                     self.conn.update(update_message, **kwargs)
404 |                 )
405 |             )
406 |         return ret
407 | 
408 |     def delete_by_query(self, query, **kwargs):
409 |         """
410 |         :param query: criteria how witch entries should be deleted
411 |         :type query: LuceneQuery
412 |         :returns: SolrUpdateResponse  -- A Solr response object.
413 | 
414 |         Delete entries by a given query
415 |         """
416 |         delete_message = json.dumps({"delete": {"query": str(query)}})
417 |         ret = scorched.response.SolrUpdateResponse.from_json(
418 |             self.conn.update(delete_message, **kwargs)
419 |         )
420 |         return ret
421 | 
422 |     def delete_by_ids(self, ids, **kwargs):
423 |         """
424 |         :param ids: ids of entries that should be deleted
425 |         :type ids: list
426 |         :returns: SolrUpdateResponse  -- A Solr response object.
427 | 
428 |         Delete entries by a given id
429 |         """
430 |         delete_message = json.dumps({"delete": ids})
431 |         ret = scorched.response.SolrUpdateResponse.from_json(
432 |             self.conn.update(delete_message, **kwargs)
433 |         )
434 |         return ret
435 | 
436 |     def commit(self, waitSearcher=None, expungeDeletes=None, softCommit=None):
437 |         """
438 |         :param waitSearcher: optional -- block until a new searcher is opened
439 |                              and registered as the main query searcher, making
440 |                              the changes visible
441 |         :type waitSearcher: bool
442 |         :param expungeDeletes: optional -- merge segments with deletes away
443 |         :type expungeDeletes: bool
444 |         :param softCommit: optional -- perform a soft commit - this will
445 |                            refresh the 'view' of the index in a more performant
446 |                            manner, but without "on-disk" guarantees.
447 |         :type softCommit: bool
448 |         :returns: SolrUpdateResponse  -- A Solr response object.
449 | 
450 |         A commit operation makes index changes visible to new search requests.
451 |         """
452 |         ret = scorched.response.SolrUpdateResponse.from_json(
453 |             self.conn.update(
454 |                 '{"commit": {}}',
455 |                 commit=True,
456 |                 waitSearcher=waitSearcher,
457 |                 expungeDeletes=expungeDeletes,
458 |                 softCommit=softCommit,
459 |             )
460 |         )
461 |         return ret
462 | 
463 |     def optimize(self, waitSearcher=None, maxSegments=None):
464 |         """
465 |         :param waitSearcher: optional -- block until a new searcher is opened
466 |                              and registered as the main query searcher, making
467 |                              the changes visible
468 |         :type waitSearcher: bool
469 |         :param maxSegments: optional -- optimizes down to at most this number
470 |                             of segments
471 |         :type maxSegments: int
472 |         :returns: SolrUpdateResponse  -- A Solr response object.
473 | 
474 |         An optimize is like a hard commit except that it forces all of the
475 |         index segments to be merged into a single segment first.
476 |         """
477 |         ret = scorched.response.SolrUpdateResponse.from_json(
478 |             self.conn.update(
479 |                 '{"optimize": {}}',
480 |                 optimize=True,
481 |                 waitSearcher=waitSearcher,
482 |                 maxSegments=maxSegments,
483 |             )
484 |         )
485 |         return ret
486 | 
487 |     def rollback(self):
488 |         """
489 |         :returns: SolrUpdateResponse  -- A Solr response object.
490 | 
491 |         The rollback command rollbacks all add/deletes made to the index since
492 |         the last commit
493 |         """
494 |         ret = scorched.response.SolrUpdateResponse.from_json(
495 |             self.conn.update('{"rollback": {}}')
496 |         )
497 |         return ret
498 | 
499 |     def delete_all(self):
500 |         """
501 |         :returns: SolrUpdateResponse  -- A Solr response object.
502 | 
503 |         Delete everything
504 |         """
505 |         return self.delete_by_query(self.Q(**{"*": "*"}))
506 | 
507 |     def get(self, ids, fields=None):
508 |         """
509 |         RealTime Get document(s) by id(s)
510 | 
511 |         :param ids: id(s) of the document(s)
512 |         :type ids: list, string or int
513 |         :param fields: optional -- list of fields to return
514 |         :type fileds: list of strings
515 |         """
516 |         ret = scorched.response.SolrResponse.from_get_json(
517 |             self.conn.get(ids, fields), self._datefields
518 |         )
519 |         return ret
520 | 
521 |     def search(self, **kwargs):
522 |         """
523 |         :returns: SolrResponse  -- A Solr response object.
524 | 
525 |         Search solr
526 |         """
527 |         params = scorched.search.params_from_dict(**kwargs)
528 |         ret = scorched.response.SolrResponse.from_json(
529 |             self.conn.select(params),
530 |             self.schema["uniqueKey"],
531 |             self._datefields,
532 |         )
533 |         return ret
534 | 
535 |     def query(self, *args, **kwargs):
536 |         """
537 |         :returns: SolrSearch -- A solrsearch.
538 | 
539 |         Build a Solr query
540 |         """
541 |         q = scorched.search.SolrSearch(self)
542 |         if len(args) + len(kwargs) > 0:
543 |             return q.query(*args, **kwargs)
544 |         else:
545 |             return q
546 | 
547 |     def mlt_search(self, content=None, **kwargs):
548 |         """
549 |         :returns: SolrResponse  -- A Solr response object.
550 | 
551 |         More like this search Solr
552 |         """
553 |         params = scorched.search.params_from_dict(**kwargs)
554 |         ret = scorched.response.SolrResponse.from_json(
555 |             self.conn.mlt(params, content=content),
556 |             self.schema["uniqueKey"],
557 |             self._datefields,
558 |         )
559 |         return ret
560 | 
561 |     def mlt_query(
562 |         self,
563 |         fields,
564 |         content=None,
565 |         content_charset=None,
566 |         url=None,
567 |         query_fields=None,
568 |         **kwargs
569 |     ):
570 |         """
571 |         :param fields: field names to compute similarity upon
572 |         :type fields: list
573 |         :param content: optional -- string on witch to find similar documents
574 |         :type content: str
575 |         :param content_charset: optional -- charset e.g. (iso-8859-1)
576 |         :type content_charset: str
577 |         :param url: optional -- like content but retrive directly from url
578 |         :type url: str
579 |         :param query_fields: optional -- adjust boosting values for ``fields``
580 |         :type query_fields: dict  e.g. ({"a": 0.25, "b": 0.75})
581 |         :returns: MltSolrSearch
582 | 
583 |         Perform a similarity query on MoreLikeThisHandler
584 | 
585 |         The MoreLikeThisHandler is expected to be registered at the '/mlt'
586 |         endpoint in the solrconfig.xml file of the server.
587 | 
588 |         Other MoreLikeThis specific parameters can be passed as kwargs without
589 |         the 'mlt.' prefix.
590 |         """
591 |         q = scorched.search.MltSolrSearch(
592 |             self, content=content, content_charset=content_charset, url=url
593 |         )
594 |         return q.mlt(fields=fields, query_fields=query_fields, **kwargs)
595 | 
596 |     def extract(self, fh, extractOnly=True, extractFormat="text"):
597 |         """
598 |         :param fh: binary file (PDF, MSWord, ODF, ...)
599 |         :type fh: open file handle
600 |         :returns: SolrExtract
601 | 
602 |         Extract text and metadatada from binary file.
603 | 
604 |         The ExtractingRequestHandler is expected to be registered at the
605 |         '/update/extract' endpoint in the solrconfig.xml file of the server.
606 |         """
607 |         url = self.conn.url + "update/extract"
608 |         params = {"wt": "json"}
609 |         if extractOnly:
610 |             params["extractOnly"] = "true"
611 |         params["extractFormat"] = extractFormat
612 |         files = {"file": fh}
613 |         response = self.conn.request("POST", url, params=params, files=files)
614 |         if response.status_code != 200:
615 |             raise scorched.exc.SolrError(response)
616 |         return scorched.response.SolrExtract.from_json(response.json())
617 | 
618 |     def Q(self, *args, **kwargs):
619 |         q = scorched.search.LuceneQuery()
620 |         q.add(args, kwargs)
621 |         return q
622 | 
623 | 
624 | def grouper(iterable, n):
625 |     """
626 |     grouper('ABCDEFG', 3) --> [['ABC'], ['DEF'], ['G']]
627 |     """
628 |     i = iter(iterable)
629 |     g = list(itertools.islice(i, 0, n))
630 |     while g:
631 |         yield g
632 |         g = list(itertools.islice(i, 0, n))
633 | 


--------------------------------------------------------------------------------
/scorched/dates.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import fnmatch
  3 | import math
  4 | import re
  5 | 
  6 | import pytz
  7 | 
  8 | import scorched.exc
  9 | 
 10 | year = r"[+/-]?\d+"
 11 | tzd = r"Z|((?P<tzd_sign>[-+])(?P<tzd_hour>\d\d):(?P<tzd_minute>\d\d))"
 12 | extended_iso_template = (
 13 |     r"(?P<year>"
 14 |     + year
 15 |     + r""")
 16 |                (-(?P<month>\d\d)
 17 |                (-(?P<day>\d\d)
 18 |             ([T%s](?P<hour>\d\d)
 19 |                 :(?P<minute>\d\d)
 20 |                (:(?P<second>\d\d)
 21 |                (.(?P<fraction>\d+))?)?
 22 |                ("""
 23 |     + tzd
 24 |     + """)?)?
 25 |                )?)?"""
 26 | )
 27 | extended_iso = extended_iso_template % " "
 28 | extended_iso_re = re.compile("^" + extended_iso + "$", re.X)
 29 | 
 30 | 
 31 | def datetime_from_w3_datestring(s):
 32 |     """We need to extend ISO syntax (as permitted by the standard) to allow
 33 |     for dates before 0AD and after 9999AD. This is how to parse such a string
 34 |     """
 35 |     m = extended_iso_re.match(s)
 36 |     if not m:
 37 |         raise ValueError
 38 |     d = m.groupdict()
 39 |     d["year"] = int(d["year"])
 40 |     d["month"] = int(d["month"] or 1)
 41 |     d["day"] = int(d["day"] or 1)
 42 |     d["hour"] = int(d["hour"] or 0)
 43 |     d["minute"] = int(d["minute"] or 0)
 44 |     d["fraction"] = d["fraction"] or "0"
 45 |     d["second"] = float("%s.%s" % ((d["second"] or "0"), d["fraction"]))
 46 |     del d["fraction"]
 47 |     if d["tzd_sign"]:
 48 |         if d["tzd_sign"] == "+":
 49 |             tzd_sign = 1
 50 |         elif d["tzd_sign"] == "-":
 51 |             tzd_sign = -1
 52 |         tz_delta = datetime_delta_factory(
 53 |             tzd_sign * int(d["tzd_hour"]), tzd_sign * int(d["tzd_minute"])
 54 |         )
 55 |     else:
 56 |         tz_delta = datetime_delta_factory(0, 0)
 57 |     del d["tzd_sign"]
 58 |     del d["tzd_hour"]
 59 |     del d["tzd_minute"]
 60 |     d["tzinfo"] = pytz.utc
 61 |     dt = datetime_factory(**d) + tz_delta
 62 |     return dt
 63 | 
 64 | 
 65 | class DateTimeRangeError(ValueError):
 66 |     pass
 67 | 
 68 | 
 69 | def datetime_factory(**kwargs):
 70 |     second = kwargs.get("second")
 71 |     if second is not None:
 72 |         f, i = math.modf(second)
 73 |         kwargs["second"] = int(i)
 74 |         kwargs["microsecond"] = int(f * 1000000)
 75 |     try:
 76 |         return datetime.datetime(**kwargs)
 77 |     except ValueError as e:
 78 |         raise DateTimeRangeError(e.args[0])
 79 | 
 80 | 
 81 | def datetime_delta_factory(hours, minutes):
 82 |     return datetime.timedelta(hours=hours, minutes=minutes)
 83 | 
 84 | 
 85 | class solr_date(object):
 86 |     """
 87 |     This class can be initialized from native python datetime
 88 |     objects and will serialize to a format appropriate for Solr
 89 |     """
 90 | 
 91 |     def __init__(self, v):
 92 |         if isinstance(v, solr_date):
 93 |             self._dt_obj = v._dt_obj
 94 |         elif isinstance(v, str):
 95 |             self._dt_obj = datetime_from_w3_datestring(v)
 96 |         elif hasattr(v, "strftime"):
 97 |             self._dt_obj = self.from_date(v)
 98 |         else:
 99 |             raise scorched.exc.SolrError(
100 |                 "Cannot initialize solr_date from %s object" % type(v)
101 |             )
102 | 
103 |     def __hash__(self):
104 |         return self._dt_obj.__hash__()
105 | 
106 |     @staticmethod
107 |     def from_date(dt_obj):
108 |         # Python datetime objects may include timezone information
109 |         if hasattr(dt_obj, "tzinfo") and dt_obj.tzinfo:
110 |             # but Solr requires UTC times.
111 |             return dt_obj.astimezone(pytz.utc).replace(tzinfo=None)
112 |         else:
113 |             return dt_obj
114 | 
115 |     @property
116 |     def microsecond(self):
117 |         return self._dt_obj.microsecond
118 | 
119 |     def __repr__(self):
120 |         return repr(self._dt_obj)
121 | 
122 |     def __str__(self):
123 |         """Serialize a datetime object in the format required
124 |         by Solr. See http://wiki.apache.org/solr/IndexingDates
125 |         """
126 |         dt_obj = self._dt_obj
127 |         if hasattr(dt_obj, "tzinfo") and dt_obj.tzinfo:
128 |             # but Solr requires UTC times.
129 |             dt_obj = dt_obj.astimezone(pytz.utc).replace(tzinfo=None)
130 |         return "%sZ" % (dt_obj.isoformat(),)
131 | 
132 |     def __lt__(self, other):
133 |         try:
134 |             other = other._dt_obj
135 |         except AttributeError:
136 |             pass
137 |         return self._dt_obj < other
138 | 
139 |     def __eq__(self, other):
140 |         try:
141 |             other = other._dt_obj
142 |         except AttributeError:
143 |             pass
144 |         return self._dt_obj == other
145 | 
146 | 
147 | def is_datetime_field(name, datefields):
148 |     if name in datefields:
149 |         return True
150 |     for fieldpattern in [d for d in datefields if "*" in d]:
151 |         # XXX: there is better than fnmatch ?
152 |         if fnmatch.fnmatch(name, fieldpattern):
153 |             return True
154 |     return False
155 | 


--------------------------------------------------------------------------------
/scorched/exc.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | 
3 | 
4 | class SolrError(Exception):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/scorched/response.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | 
  3 | import json
  4 | from collections.abc import Sequence
  5 | 
  6 | import scorched.dates
  7 | from scorched.compat import str
  8 | from scorched.search import is_iter
  9 | 
 10 | 
 11 | class SolrFacetCounts(object):
 12 |     members = (
 13 |         "facet_dates",
 14 |         "facet_fields",
 15 |         "facet_queries",
 16 |         "facet_ranges",
 17 |         "facet_pivot",
 18 |     )
 19 | 
 20 |     def __init__(self, **kwargs):
 21 |         for member in self.members:
 22 |             setattr(self, member, kwargs.get(member, {}))
 23 |         self.facet_fields = dict(self.facet_fields)
 24 | 
 25 |     @classmethod
 26 |     def from_json(cls, response):
 27 |         try:
 28 |             facet_counts = response["facet_counts"]
 29 |         except KeyError:
 30 |             return SolrFacetCounts()
 31 |         facet_fields = {}
 32 |         for facet_field, facet_values in list(facet_counts["facet_fields"].items()):
 33 |             facets = []
 34 |             # Change each facet list from [a, 1, b, 2, c, 3 ...] to
 35 |             # [(a, 1), (b, 2), (c, 3) ...]
 36 |             for n, value in enumerate(facet_values):
 37 |                 if n & 1 == 0:
 38 |                     name = value
 39 |                 else:
 40 |                     facets.append((name, value))
 41 |             facet_fields[facet_field] = facets
 42 |         facet_counts["facet_fields"] = facet_fields
 43 |         for facet_field in list(facet_counts["facet_ranges"].keys()):
 44 |             counts = []
 45 |             count_list = facet_counts["facet_ranges"][facet_field]["counts"]
 46 |             # Change each facet list from [a, 1, b, 2, c, 3 ...] to
 47 |             # [(a, 1), (b, 2), (c, 3) ...]
 48 |             for n, value in enumerate(count_list):
 49 |                 if n & 1 == 0:
 50 |                     name = value
 51 |                 else:
 52 |                     counts.append((name, value))
 53 |             facet_counts["facet_ranges"][facet_field]["counts"] = counts
 54 |         return SolrFacetCounts(**facet_counts)
 55 | 
 56 | 
 57 | class SolrExtract(object):
 58 |     @classmethod
 59 |     def from_json(cls, doc, filename=None):
 60 |         self = cls()
 61 |         if filename is None:
 62 |             for attrname in doc:
 63 |                 if attrname.endswith("_metadata"):
 64 |                     filename = attrname[:-9]
 65 |         self.text = doc[filename]
 66 |         metadata = doc[filename + "_metadata"]
 67 |         self.metadata = dict(zip(metadata[0::2], metadata[1::2]))
 68 |         for attr in ["QTime", "status"]:
 69 |             setattr(self, attr, doc["responseHeader"].get(attr))
 70 |         return self
 71 | 
 72 | 
 73 | class SolrStats(object):
 74 |     members = (
 75 |         "stats_fields",
 76 |         "facet",
 77 |     )
 78 | 
 79 |     def __init__(self, **kwargs):
 80 |         for member in self.members:
 81 |             setattr(self, member, kwargs.get(member, ()))
 82 |         self.stats_fields = dict(self.stats_fields)
 83 | 
 84 |     @classmethod
 85 |     def from_json(cls, response):
 86 |         try:
 87 |             stats_response = response["stats"]
 88 |         except KeyError:
 89 |             return SolrStats()
 90 |         stats = {"stats_fields": {}}
 91 |         # faceted stats, if present, are included within the field
 92 |         for field, values in list(stats_response["stats_fields"].items()):
 93 |             stats["stats_fields"][field] = values
 94 | 
 95 |         return SolrStats(**stats)
 96 | 
 97 | 
 98 | class SolrUpdateResponse(object):
 99 |     @classmethod
100 |     def from_json(cls, jsonmsg):
101 |         self = cls()
102 |         self.original_json = jsonmsg
103 |         doc = json.loads(jsonmsg)
104 |         details = doc["responseHeader"]
105 |         for attr in ["QTime", "params", "status"]:
106 |             setattr(self, attr, details.get(attr))
107 |         if self.status != 0:
108 |             raise ValueError("Response indicates an error")
109 |         return self
110 | 
111 | 
112 | class SolrResponse(Sequence):
113 |     @classmethod
114 |     def from_json(cls, jsonmsg, unique_key, datefields=()):
115 |         self = cls()
116 |         self.original_json = jsonmsg
117 |         doc = json.loads(jsonmsg)
118 |         details = doc["responseHeader"]
119 |         for attr in ["QTime", "params", "status"]:
120 |             setattr(self, attr, details.get(attr))
121 |         if self.status != 0:
122 |             raise ValueError("Response indicates an error")
123 |         self.result = SolrResult()
124 |         if doc.get("response"):
125 |             self.result = SolrResult.from_json(doc["response"], datefields)
126 |         # TODO mlt/ returns match what should we do with it ?
127 |         # if doc.get('match'):
128 |         #    self.result = SolrResult.from_json(doc['match'], datefields)
129 |         self.facet_counts = SolrFacetCounts.from_json(doc)
130 |         self.spellcheck = doc.get("spellcheck", {})
131 |         if self.params is not None:
132 |             self.group_field = self.params.get("group.field")
133 |         else:
134 |             self.group_field = None
135 |         self.groups = {}
136 |         if self.group_field is not None:
137 |             self.groups = SolrGroupResult.from_json(
138 |                 doc["grouped"], self.group_field, datefields
139 |             )
140 |         self.highlighting = doc.get("highlighting", {})
141 |         if self.highlighting:
142 |             # Add highlighting info to the individual documents.
143 |             if doc.get("response"):
144 |                 for d in self.result.docs:
145 |                     k = str(d[unique_key])
146 |                     if k in self.highlighting:
147 |                         d["solr_highlights"] = self.highlighting[k]
148 |             elif doc.get("grouped"):
149 |                 for group in getattr(self.groups, self.group_field)["groups"]:
150 |                     for d in group["doclist"]["docs"]:
151 |                         k = str(d[unique_key])
152 |                         if k in self.highlighting:
153 |                             d["solr_highlights"] = self.highlighting[k]
154 | 
155 |         self.debug = doc.get("debug", {})
156 |         self.next_cursor_mark = doc.get("nextCursorMark")
157 |         self.more_like_these = dict(
158 |             (k, SolrResult.from_json(v, datefields))
159 |             for (k, v) in list(doc.get("moreLikeThis", {}).items())
160 |         )
161 |         self.term_vectors = self.parse_term_vectors(doc.get("termVectors", []))
162 |         # can be computed by MoreLikeThisHandler
163 |         self.interesting_terms = doc.get("interestingTerms", None)
164 |         self.stats = SolrStats.from_json(doc)
165 |         return self
166 | 
167 |     @classmethod
168 |     def from_get_json(cls, jsonmsg, datefields=()):
169 |         """Generate instance from the response of a RealTime Get"""
170 |         self = cls()
171 |         self.groups = {}
172 |         self.original_json = jsonmsg
173 |         doc = json.loads(jsonmsg)
174 |         self.result = SolrResult.from_json(doc["response"], datefields)
175 |         return self
176 | 
177 |     @classmethod
178 |     def parse_term_vectors(cls, lst, path=""):
179 |         """Transform a solr list to dict
180 | 
181 |         Turns [a, x, b, y, c, z ...] into {a: x, b: y, c: z ...}
182 |         If the values are lists themselves, this is done recursively
183 |         """
184 |         dct = dict()
185 |         for i in range(0, len(lst), 2):
186 |             k = lst[i]
187 |             v = lst[i + 1]
188 |             # Do not recurse too deep into warnings list
189 |             if path != ".warnings" and isinstance(v, list):
190 |                 v = cls.parse_term_vectors(v, path + "." + k)
191 |             dct[k] = v
192 |         return dct
193 | 
194 |     def __str__(self):
195 |         return str(self.result)
196 | 
197 |     def __len__(self):
198 |         if self.groups:
199 |             return len(getattr(self.groups, self.group_field)["groups"])
200 |         else:
201 |             return len(self.result.docs)
202 | 
203 |     def __getitem__(self, key):
204 |         if self.groups:
205 |             return getattr(self.groups, self.group_field)["groups"][key]
206 |         else:
207 |             return self.result.docs[key]
208 | 
209 | 
210 | class SolrResult(object):
211 |     @classmethod
212 |     def from_json(cls, node, datefields=()):
213 |         self = cls()
214 |         self.name = "response"
215 |         self.numFound = int(node["numFound"])
216 |         self.start = int(node["start"])
217 |         docs = node["docs"]
218 |         self.docs = self._prepare_docs(docs, datefields)
219 |         return self
220 | 
221 |     @staticmethod
222 |     def _prepare_docs(docs, datefields):
223 |         for doc in docs:
224 |             for name, value in list(doc.items()):
225 |                 if scorched.dates.is_datetime_field(name, datefields):
226 |                     if is_iter(value):
227 |                         doc[name] = [scorched.dates.solr_date(v)._dt_obj for v in value]
228 |                     else:
229 |                         doc[name] = scorched.dates.solr_date(value)._dt_obj
230 |         return docs
231 | 
232 |     def __str__(self):
233 |         return "{numFound} results found, starting at #{start}".format(
234 |             numFound=self.numFound, start=self.start
235 |         )
236 | 
237 | 
238 | class SolrGroupResult(object):
239 |     @classmethod
240 |     def from_json(cls, node, group_field, datefields=()):
241 |         self = cls()
242 |         self.name = "response"
243 |         self.group_field = group_field
244 |         groups = node[group_field]["groups"]
245 |         setattr(
246 |             self,
247 |             group_field,
248 |             {
249 |                 "matches": node[group_field]["matches"],
250 |                 "ngroups": node[group_field]["ngroups"],
251 |                 "groups": self._prepare_groups(groups, datefields),
252 |             },
253 |         )
254 |         return self
255 | 
256 |     @staticmethod
257 |     def _prepare_groups(groups, datefields):
258 |         """Iterate over the docs and the groups and cast fields appropriately"""
259 |         for group in groups:
260 |             for doc in group["doclist"]["docs"]:
261 |                 for name, value in doc.items():
262 |                     if scorched.dates.is_datetime_field(name, datefields):
263 |                         if is_iter(value):
264 |                             doc[name] = [
265 |                                 scorched.dates.solr_date(v)._dt_obj for v in value
266 |                             ]
267 |                         else:
268 |                             doc[name] = scorched.dates.solr_date(value)._dt_obj
269 |         return groups
270 | 
271 |     def __str__(self):
272 |         return "{ngroups} groups with {matches} matches found".format(
273 |             ngroups=getattr(self, self.group_field)["ngroups"],
274 |             matches=getattr(self, self.group_field)["matches"],
275 |         )
276 | 


--------------------------------------------------------------------------------
/scorched/strings.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | from scorched.compat import python_2_unicode_compatible, str
 4 | 
 5 | 
 6 | class SolrString(str):
 7 |     # The behaviour below is only really relevant for String fields rather
 8 |     # than Text fields - most queryparsers will strip these characters out
 9 |     # for a text field anyway.
10 |     lucene_special_chars = '+-&|!(){}[]^"~*?: \t\v\\/'
11 | 
12 |     def escape_for_lqs_term(self):
13 |         if self in ["AND", "OR", "NOT", ""]:
14 |             return '"%s"' % self
15 |         chars = []
16 |         for c in self.chars:
17 |             if isinstance(c, str) and c in self.lucene_special_chars:
18 |                 chars.append("\\%s" % c)
19 |             else:
20 |                 chars.append("%s" % c)
21 |         return "".join(chars)
22 | 
23 | 
24 | class RawString(SolrString):
25 |     def __init__(self, s):
26 |         self.chars = self
27 | 
28 | 
29 | class WildcardString(SolrString):
30 |     def __init__(self, s):
31 |         self.chars = self.get_wildcards(s)
32 | 
33 |     class SpecialChar(object):
34 |         @python_2_unicode_compatible
35 |         def __str__(self):
36 |             return str(self.char)
37 | 
38 |     class Asterisk(SpecialChar):
39 |         char = "*"
40 | 
41 |     class QuestionMark(SpecialChar):
42 |         char = "?"
43 | 
44 |     def get_wildcards(self, s):
45 |         backslash = False
46 |         i = 0
47 |         chars = []
48 |         for c in s:
49 |             if backslash:
50 |                 backslash = False
51 |                 chars.append(c)
52 |                 continue
53 |             i += 1
54 |             if c == "\\":
55 |                 backslash = True
56 |             elif c == "*":
57 |                 chars.append(self.Asterisk())
58 |             elif c == "?":
59 |                 chars.append(self.QuestionMark())
60 |             else:
61 |                 chars.append(c)
62 |         if backslash:
63 |             chars.append("\\")
64 |         return chars
65 | 
66 | 
67 | class DismaxString(str):
68 |     """A dismax query string that should not be escaped by the client."""
69 | 


--------------------------------------------------------------------------------
/scorched/testing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import unicode_literals
 3 | import requests
 4 | import os
 5 | import unittest
 6 | if not hasattr(unittest, "skip"):
 7 |     try:
 8 |         import unittest2 as unittest
 9 |     except:
10 |         pass
11 | import warnings
12 | 
13 | from scorched.compat import str
14 | 
15 | 
16 | def is_solr_available(dsn=None):
17 |     if not dsn:
18 |         dsn = os.environ.get("SOLR_URL",
19 |                              "http://localhost:8983/solr")
20 |     if dsn is not None:
21 |         try:
22 |             requests.get(dsn, timeout=1)
23 |             return True
24 |         except Exception as e:
25 |             print("Connection error:%s" % str(e))
26 |     return False
27 | 
28 | 
29 | def skip_unless_solr(func):
30 |     """
31 |     Use this decorator to skip tests which need a functional Solr connection.
32 |     The connection is given by the environment SOLR_URL
33 |     """
34 | 
35 |     if is_solr_available():
36 |         return func
37 |     msg = "Test needs a running Solr connection (SOLR_URL)"
38 |     warnings.warn(msg + str(func))
39 |     return unittest.skip(msg)(func)
40 | 


--------------------------------------------------------------------------------
/scorched/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/scorched/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import socket
 3 | 
 4 | import pytest
 5 | import requests
 6 | from requests.exceptions import ConnectionError
 7 | 
 8 | 
 9 | def get_unused_port():
10 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
11 |     s.bind(("localhost", 0))
12 |     addr, port = s.getsockname()
13 |     s.close()
14 |     return port
15 | 
16 | 
17 | def is_responsive(url):
18 |     ping_url = f"{url}/admin/ping"
19 |     try:
20 |         response = requests.get(ping_url)
21 |         if response.status_code == 200:
22 |             return True
23 |     except ConnectionError:
24 |         return False
25 | 
26 | 
27 | @pytest.fixture(scope="session")
28 | def docker_compose_file(pytestconfig):
29 |     # This is hackish. `docker_compose_file` is
30 |     # called before the fixture `docker_services` is
31 |     # executed and this is the only point where
32 |     # we could inject our own custom port into the environment.
33 |     # Quite after usage of the `docker_services` fixture
34 |     # the container is still started and changes in environment
35 |     # have no effect.
36 |     #
37 |     # ensure that we use an unused custom port to allow
38 |     # for multiple instances to run simultanously
39 |     port = get_unused_port()
40 |     os.environ["SCORCHED_TEST_SOLR_PORT"] = str(port)
41 |     return os.path.join(
42 |         str(pytestconfig.rootdir), "scorched", "tests", "docker-compose.yml"
43 |     )
44 | 
45 | 
46 | @pytest.fixture(scope="session")
47 | def solr_url(docker_ip, docker_services):
48 |     """Ensure that HTTP service is up and responsive."""
49 |     # `port_for` takes a container port and returns the corresponding host port
50 |     port = docker_services.port_for("solr", 8983)
51 |     solr_url = "http://{}:{}/solr/core0".format(docker_ip, port)
52 |     docker_services.wait_until_responsive(
53 |         timeout=30.0, pause=1.0, check=lambda: is_responsive(solr_url)
54 |     )
55 |     os.environ["SOLR_URL"] = solr_url
56 |     return solr_url
57 | 


--------------------------------------------------------------------------------
/scorched/tests/data/lipsum.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lugensa/scorched/a1ca2970085c01bcde2177cee6e67b9dc40b86c6/scorched/tests/data/lipsum.pdf


--------------------------------------------------------------------------------
/scorched/tests/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   solr:
 4 |     image: solr:8
 5 |     ports:
 6 |      - "${SCORCHED_TEST_SOLR_PORT:-44177}:8983"
 7 |     volumes:
 8 |       - ./:/tests
 9 |     command: |
10 |       bash -c 'precreate-core core0 &&
11 |       cp -a /opt/solr/server/solr/configsets/sample_techproducts_configs/conf/* /var/solr/data/core0/conf/ &&
12 |       cp /tests/solrconfig_8.11.xml /var/solr/data/core0/conf/solrconfig.xml &&
13 |       solr-foreground'
14 | 
15 | # docker run -d --rm -p 44177:8983 \
16 | #   -v $PWD/scorched/tests:/tests \
17 | #   --name my_solr solr:8 bash -c \
18 | #   "precreate-core core0 && "\
19 | #   "cp -a /opt/solr/server/solr/configsets/sample_techproducts_configs/conf/* /var/solr/data/core0/conf/ &&"\
20 | #   "cp /tests/solrconfig_8.11.xml /var/solr/data/core0/conf/solrconfig.xml && solr-foreground"
21 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/books.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id" : "978-0641723445",
 4 |     "cat" : ["book","hardcover"],
 5 |     "name" : "The Lightning Thief",
 6 |     "author" : "Rick Riordan",
 7 |     "series_t" : "Percy Jackson and the Olympians",
 8 |     "sequence_i" : 1,
 9 |     "genre_s" : "fantasy",
10 |     "inStock" : true,
11 |     "price" : 12.50,
12 |     "pages_i" : 384
13 |   }
14 | ,
15 |   {
16 |     "id" : "978-1423103349",
17 |     "cat" : ["book","paperback"],
18 |     "name" : "The Sea of Monsters",
19 |     "author" : "Rick Riordan",
20 |     "series_t" : "Percy Jackson and the Olympians",
21 |     "sequence_i" : 2,
22 |     "genre_s" : "fantasy",
23 |     "inStock" : true,
24 |     "price" : 6.49,
25 |     "pages_i" : 304
26 |   }
27 | ,
28 |   {
29 |     "id" : "978-1857995879",
30 |     "cat" : ["book","paperback"],
31 |     "name" : "Sophie's World : The Greek Philosophers",
32 |     "author" : "Jostein Gaarder",
33 |     "sequence_i" : 1,
34 |     "genre_s" : "fantasy",
35 |     "inStock" : true,
36 |     "price" : 3.07,
37 |     "pages_i" : 64
38 |   }
39 | ,
40 |   {
41 |     "id" : "978-1933988177",
42 |     "cat" : ["book","paperback"],
43 |     "name" : "Lucene in Action, Second Edition",
44 |     "author" : "Michael McCandless",
45 |     "sequence_i" : 1,
46 |     "genre_s" : "IT",
47 |     "inStock" : true,
48 |     "price" : 30.50,
49 |     "pages_i" : 475
50 |   }
51 | ]
52 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/request_error.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "responseHeader": {
 3 |     "status": 400,
 4 |     "QTime": 1,
 5 |     "params": {
 6 |       "facet": "true",
 7 |       "indent": "true",
 8 |       "q": "genre_s:fantasy",
 9 |       "_": "1394706864646",
10 |       "facet.field": "cat",
11 |       "wt": "json"
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/request_hl.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "responseHeader": {
 3 |     "status": 0,
 4 |     "QTime": 1,
 5 |     "params": {
 6 |       "q": "author:John",
 7 |       "hl": "true",
 8 |       "hl.fl": "author",
 9 |       "wt": "json"
10 |     }
11 |   },
12 |   "response": {
13 |     "numFound": 1,
14 |     "start": 0,
15 |     "docs": [
16 |       {
17 |         "name": "The Höhlentripp Strauß",
18 |         "author": "John Muir",
19 |         "author_s": "John Muir",
20 |         "series_t": "Percy Jackson and ☂nicode",
21 |         "pages_i": 384,
22 |         "genre_s": "fantasy",
23 |         "id": "978",
24 |         "sequence_i": 1,
25 |         "inStock": true,
26 |         "cat": [
27 |           "book",
28 |           "hardcover"
29 |         ],
30 |         "price": 12.5,
31 |         "price_c": "12.5,USD",
32 |         "_version_": 1547482048566919168
33 |       }
34 |     ]
35 |   },
36 |   "highlighting": {
37 |     "978": {
38 |       "author": [
39 |         "<em>John</em> Muir"
40 |       ]
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/request_hl_grouped.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "responseHeader": {
 3 |     "status": 0,
 4 |     "QTime": 1,
 5 |     "params": {
 6 |       "q": "author:Muir",
 7 |       "hl": "true",
 8 |       "hl.fl": "author",
 9 |       "group.ngroups": "true",
10 |       "wt": "json",
11 |       "group.field": "inStock",
12 |       "group": "true"
13 |     }
14 |   },
15 |   "grouped": {
16 |     "inStock": {
17 |       "matches": 2,
18 |       "ngroups": 1,
19 |       "groups": [
20 |         {
21 |           "groupValue": true,
22 |           "doclist": {
23 |             "numFound": 2,
24 |             "start": 0,
25 |             "docs": [
26 |               {
27 |                 "name": "The Yosemite",
28 |                 "author": "John Muir",
29 |                 "author_s": "John Muir",
30 |                 "price": 12.5,
31 |                 "price_c": "12.5,USD",
32 |                 "important_dts": [
33 |                   "1969-01-01T00:00:00Z",
34 |                   "1969-01-02T00:00:00Z"
35 |                 ],
36 |                 "inStock": true,
37 |                 "id": "978",
38 |                 "_version_": 1547485322340728832
39 |               }
40 |             ]
41 |           }
42 |         }
43 |       ]
44 |     }
45 |   },
46 |   "highlighting": {
47 |     "978": {
48 |       "author": [
49 |         "John <em>Muir</em>"
50 |       ]
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/request_w_facets.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "responseHeader": {
  3 |     "status": 0,
  4 |     "QTime": 1,
  5 |     "params": {
  6 |       "facet": "true",
  7 |       "indent": "true",
  8 |       "q": "genre_s:fantasy",
  9 |       "_": "1394706864646",
 10 |       "facet.field": "cat",
 11 |       "wt": "json"
 12 |     }
 13 |   },
 14 |   "response": {
 15 |     "numFound": 3,
 16 |     "start": 0,
 17 |     "docs": [
 18 |       {
 19 |         "id": "978-0641723445",
 20 |         "cat": [
 21 |           "book",
 22 |           "hardcover"
 23 |         ],
 24 |         "name": "The Lightning Thief",
 25 |         "author": "Rick Riordan",
 26 |         "author_s": "Rick Riordan",
 27 |         "series_t": "Percy Jackson and the Olympians",
 28 |         "sequence_i": 1,
 29 |         "genre_s": "fantasy",
 30 |         "inStock": true,
 31 |         "price": 12.5,
 32 |         "price_c": "12.5,USD",
 33 |         "created_dt": "2009-07-23T03:24:34.000376Z",
 34 |         "modified": "2009-07-23T03:24:34.000376Z",
 35 |         "not_a_datetime_field_modified": "name of this field ends with modified but is not a datetime",
 36 |         "pages_i": 384,
 37 |         "_version_": 1462456002687271000
 38 |       },
 39 |       {
 40 |         "id": "978-1423103349",
 41 |         "cat": [
 42 |           "book",
 43 |           "paperback"
 44 |         ],
 45 |         "name": "The Sea of Monsters",
 46 |         "author": "Rick Riordan",
 47 |         "author_s": "Rick Riordan",
 48 |         "series_t": "Percy Jackson and the Olympians",
 49 |         "sequence_i": 2,
 50 |         "genre_s": "fantasy",
 51 |         "inStock": true,
 52 |         "price": 6.49,
 53 |         "price_c": "6.49,USD",
 54 |         "pages_i": 304,
 55 |         "_version_": 1462456002688319500
 56 |       },
 57 |       {
 58 |         "id": "978-1857995879",
 59 |         "cat": [
 60 |           "book",
 61 |           "paperback"
 62 |         ],
 63 |         "name": "Sophie's World : The Greek Philosophers",
 64 |         "author": "Jostein Gaarder",
 65 |         "author_s": "Jostein Gaarder",
 66 |         "sequence_i": 1,
 67 |         "genre_s": "fantasy",
 68 |         "inStock": true,
 69 |         "price": 3.07,
 70 |         "price_c": "3.07,USD",
 71 |         "pages_i": 64,
 72 |         "_version_": 1462456002689368000
 73 |       }
 74 |     ]
 75 |   },
 76 |   "facet_counts": {
 77 |     "facet_queries": {},
 78 |     "facet_fields": {
 79 |       "cat": [
 80 |         "book",
 81 |         3,
 82 |         "paperback",
 83 |         2,
 84 |         "hardcover",
 85 |         1
 86 |       ]
 87 |     },
 88 |     "facet_dates": {},
 89 |     "facet_ranges": {
 90 |       "created_dt": {
 91 |         "counts": [
 92 |           "2009-01-01T00:00:00Z",
 93 |           1,
 94 |           "2010-01-01T00:00:00Z",
 95 |           0,
 96 |           "2011-01-01T00:00:00Z",
 97 |           0
 98 |         ],
 99 |         "gap":"+1YEARS",
100 |         "start":"2009-01-01T00:00:00Z",
101 |         "end":"2012-01-01T00:00:00Z"
102 |       }
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/scorched/tests/dumps/request_w_termvector.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "responseHeader": {
  3 |     "status": 0,
  4 |     "QTime": 24,
  5 |     "params": {
  6 |       "tv.tf": "true",
  7 |       "indent": "true",
  8 |       "q": "*:*",
  9 |       "tv": "true",
 10 |       "_": "1441938054458",
 11 |       "tv.fl": "weighted_words,title",
 12 |       "tv.df": "true",
 13 |       "wt": "json",
 14 |       "rows": "2"
 15 |     }
 16 |   },
 17 |   "response": {
 18 |     "numFound": 333940,
 19 |     "start": 0,
 20 |     "docs": [
 21 |       {
 22 |         "title": "Medizinprodukteberater",
 23 |         "uid": "ffaa9370-5182-5810-b8a9-54b751ef0606",
 24 |         "date": "2015-09-09T16:42:20.735Z",
 25 |         "cuid": "ffaa9370-5182-5810-b8a9-54b751ef0606",
 26 |         "geohex": [
 27 |           "378,70"
 28 |         ],
 29 |         "_version_": 1511854640452337700
 30 |       },
 31 |       {
 32 |         "title": "Automatisierungstechniker m/w",
 33 |         "uid": "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f",
 34 |         "uri": "http://meega.de/1431501-automatisierungstechniker-m-w.html",
 35 |         "cuid": "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f",
 36 |         "geohex": [
 37 |           "357,61"
 38 |         ],
 39 |         "_version_": 1511857045532311600
 40 |       }
 41 |     ]
 42 |   },
 43 |   "termVectors": [
 44 |     "uniqueKeyFieldName",
 45 |     "uid",
 46 |     "warnings",
 47 |     [
 48 |       "noTermVectors",
 49 |       [
 50 |         "title"
 51 |       ]
 52 |     ],
 53 |     "ffaa9370-5182-5810-b8a9-54b751ef0606",
 54 |     [
 55 |       "uniqueKey",
 56 |       "ffaa9370-5182-5810-b8a9-54b751ef0606",
 57 |       "weighted_words",
 58 |       [
 59 |         "denken",
 60 |         [
 61 |           "tf",
 62 |           1,
 63 |           "df",
 64 |           10409
 65 |         ],
 66 |         "dienstfahrtzeug",
 67 |         [
 68 |           "tf",
 69 |           1,
 70 |           "df",
 71 |           1
 72 |         ],
 73 |         "dokumentation",
 74 |         [
 75 |           "tf",
 76 |           1,
 77 |           "df",
 78 |           19774
 79 |         ],
 80 |         "eigeninitiative",
 81 |         [
 82 |           "tf",
 83 |           1,
 84 |           "df",
 85 |           11369
 86 |         ],
 87 |         "wirken",
 88 |         [
 89 |           "tf",
 90 |           1,
 91 |           "df",
 92 |           106
 93 |         ]
 94 |       ]
 95 |     ],
 96 |     "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f",
 97 |     [
 98 |       "uniqueKey",
 99 |       "9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f",
100 |       "weighted_words",
101 |       [
102 |         "anlagen",
103 |         [
104 |           "tf",
105 |           3,
106 |           "df",
107 |           21484
108 |         ],
109 |         "instandhaltung",
110 |         [
111 |           "tf",
112 |           2,
113 |           "df",
114 |           11717
115 |         ],
116 |         "kontakte",
117 |         [
118 |           "tf",
119 |           1,
120 |           "df",
121 |           9893
122 |         ],
123 |         "wert",
124 |         [
125 |           "tf",
126 |           1,
127 |           "df",
128 |           8572
129 |         ]
130 |       ]
131 |     ]
132 |   ]
133 | }
134 | 


--------------------------------------------------------------------------------
/scorched/tests/test_connection.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import os
  4 | import requests
  5 | import scorched.connection
  6 | import unittest
  7 | 
  8 | from unittest import mock
  9 | 
 10 | 
 11 | HTTPBIN = os.environ.get('HTTPBIN_URL', 'https://httpbin.org/')
 12 | # Issue #1483: Make sure the URL always has a trailing slash
 13 | HTTPBIN = HTTPBIN.rstrip('/') + '/'
 14 | 
 15 | 
 16 | def httpbin(*suffix):
 17 |     """Returns url for HTTPBIN resource."""
 18 |     return requests.compat.urljoin(HTTPBIN, '/'.join(suffix))
 19 | 
 20 | 
 21 | class TestConnection(unittest.TestCase):
 22 | 
 23 |     def _make_connection(self, url="http://localhost:8983/solr",
 24 |                          http_connection=None, mode="r", retry_timeout=-1,
 25 |                          max_length_get_url=2048):
 26 | 
 27 |         sc = scorched.connection.SolrConnection(
 28 |             url=url,
 29 |             http_connection=http_connection,
 30 |             mode=mode,
 31 |             retry_timeout=retry_timeout,
 32 |             max_length_get_url=max_length_get_url)
 33 | 
 34 |         return sc
 35 | 
 36 |     def test_readable(self):
 37 |         sc = self._make_connection()
 38 |         self.assertRaises(TypeError, sc.update, {})
 39 | 
 40 |     def test_writeable(self):
 41 |         sc = self._make_connection(mode="w")
 42 |         self.assertRaises(TypeError, sc.mlt, [])
 43 |         self.assertRaises(TypeError, sc.select, {})
 44 | 
 45 |     def test_mlt(self):
 46 |         sc = self._make_connection(mode="")
 47 |         with mock.patch.object(requests.Session, 'request',
 48 |                                return_value=mock.Mock(status_code=500)):
 49 |             self.assertRaises(scorched.exc.SolrError, sc.mlt, [])
 50 |             # test content
 51 |         with mock.patch.object(requests.Session, 'request',
 52 |                                return_value=mock.Mock(status_code=500)):
 53 |             self.assertRaises(scorched.exc.SolrError, sc.mlt, [],
 54 |                               content="fooo")
 55 |         # test post building
 56 |         sc = self._make_connection(max_length_get_url=0)
 57 |         with mock.patch.object(requests.Session, 'request',
 58 |                                return_value=mock.Mock(status_code=500)):
 59 |             self.assertRaises(scorched.exc.SolrError, sc.mlt, [],
 60 |                               content="fooo")
 61 | 
 62 |     def test_select(self):
 63 |         sc = self._make_connection(max_length_get_url=0)
 64 |         with mock.patch.object(requests.Session, 'request',
 65 |                                return_value=mock.Mock(status_code=500)):
 66 |             self.assertRaises(scorched.exc.SolrError, sc.select, [])
 67 | 
 68 |     def test_no_body_response_error(self):
 69 |         sc = self._make_connection(mode="")
 70 |         with mock.patch.object(requests.Session, 'request',
 71 |                                return_value=mock.Mock(status_code=500)):
 72 |             self.assertRaises(scorched.exc.SolrError, sc.update, {"foo": 2})
 73 |             self.assertRaises(scorched.exc.SolrError, sc.update, {})
 74 | 
 75 |     def test_request(self):
 76 |         sc = self._make_connection(url="http://localhost:1234/none", mode="")
 77 |         self.assertRaises(Exception, sc.request, (), {})
 78 | 
 79 |     def test_url_for_update(self):
 80 |         dsn = "http://localhost:1234/none"
 81 |         sc = self._make_connection(url=dsn)
 82 |         ret = sc.url_for_update()
 83 | 
 84 |         def dsn_url(path):
 85 |             return "%s%s" % (dsn, path)
 86 | 
 87 |         self.assertEqual(ret, dsn_url("/update/json"))
 88 |         # commitwithin
 89 |         ret = sc.url_for_update(commitWithin=2)
 90 |         self.assertEqual(ret, dsn_url("/update/json?commitWithin=2"))
 91 |         self.assertRaises(ValueError, sc.url_for_update, commitWithin="a")
 92 |         self.assertRaises(ValueError, sc.url_for_update, commitWithin=-1)
 93 |         # softCommit
 94 |         ret = sc.url_for_update(softCommit=True)
 95 |         self.assertEqual(ret, dsn_url("/update/json?softCommit=true"))
 96 |         ret = sc.url_for_update(softCommit=False)
 97 |         self.assertEqual(ret, dsn_url("/update/json?softCommit=false"))
 98 |         # optimize
 99 |         ret = sc.url_for_update(optimize=True)
100 |         self.assertEqual(ret, dsn_url("/update/json?optimize=true"))
101 |         ret = sc.url_for_update(optimize=False)
102 |         self.assertEqual(ret, dsn_url("/update/json?optimize=false"))
103 |         # waitSearcher
104 |         ret = sc.url_for_update(waitSearcher=True)
105 |         self.assertEqual(ret, dsn_url("/update/json?waitSearcher=true"))
106 |         ret = sc.url_for_update(waitSearcher=False)
107 |         self.assertEqual(ret, dsn_url("/update/json?waitSearcher=false"))
108 |         # expungeDeletes
109 |         ret = sc.url_for_update(commit=True, expungeDeletes=True)
110 |         self.assertEqual(
111 |             ret, dsn_url("/update/json?commit=true&expungeDeletes=true"))
112 |         ret = sc.url_for_update(commit=True, expungeDeletes=False)
113 |         self.assertEqual(
114 |             ret, dsn_url("/update/json?commit=true&expungeDeletes=false"))
115 |         self.assertRaises(ValueError, sc.url_for_update, expungeDeletes=True)
116 |         # maxSegments
117 |         ret = sc.url_for_update(optimize=True, maxSegments=2)
118 |         self.assertEqual(
119 |             ret, dsn_url("/update/json?maxSegments=2&optimize=true"))
120 |         self.assertRaises(
121 |             ValueError, sc.url_for_update, optimize=True, maxSegments="a")
122 |         self.assertRaises(
123 |             ValueError, sc.url_for_update, optimize=True, maxSegments=-1)
124 |         self.assertRaises(ValueError, sc.url_for_update, maxSegments=2)
125 | 
126 |     def test_select_timeout(self):
127 |         dsn = "http://localhost:1234/none"
128 |         # max_length_get_url=99999: httbin doesn't support POST
129 |         sc = scorched.connection.SolrConnection(
130 |             url=dsn, http_connection=None, mode="", retry_timeout=-1,
131 |             max_length_get_url=99999, search_timeout=3.0)
132 |         sc.select_url = httpbin('delay/2')
133 |         # delay 2.0s < 3.0s timeout, ok
134 |         resp = sc.select([])
135 |         self.assertTrue(json.loads(resp)['url'].startswith(sc.select_url))
136 |         # delay 2.0s > 1.0s timeout, raise ReadTimeout
137 |         sc.search_timeout = 1.0
138 |         self.assertRaises(requests.exceptions.ReadTimeout, sc.select, [])
139 |         sc.search_timeout = (5.0, 1.0)  # (connect, read)
140 |         self.assertRaises(requests.exceptions.ReadTimeout, sc.select, [])
141 |         # delay 2.0s < 3.0s timeout, ok
142 |         sc.search_timeout = (1.0, 3.0)  # (connect, read)
143 |         resp = sc.select([])
144 |         self.assertTrue(json.loads(resp)['url'].startswith(sc.select_url))
145 |         # Connecting to an invalid port should raise a ConnectionError
146 |         sc.select_url = "https://httpbin.org:1/none/select"
147 |         sc.search_timeout = 1.0
148 |         self.assertRaises(requests.exceptions.ConnectTimeout, sc.select, [])
149 |         sc.search_timeout = (1.0, 5.0)
150 |         self.assertRaises(requests.exceptions.ConnectTimeout, sc.select, [])
151 | 
152 |     def test_basic_auth(self):
153 |         hc = requests.Session()
154 |         hc.auth = ('joe', 'Secret')
155 | 
156 |         dsn = "http://localhost:1234/none"
157 |         sc = self._make_connection(url=dsn, http_connection=hc)
158 |         sc.select_url = httpbin('/basic-auth/{0}/{1}'.format(*hc.auth))
159 | 
160 |         resp = sc.select([])
161 |         self.assertTrue(json.loads(resp)['authenticated'])
162 | 
163 | 
164 | class TestSolrInterface(unittest.TestCase):
165 | 
166 |     def _make_one(self):
167 |         import scorched.connection
168 |         import scorched.tests.schema
169 |         with mock.patch('scorched.connection.SolrInterface.init_schema') as \
170 |                 init_schema:
171 |             init_schema.return_value = scorched.tests.schema.schema
172 |             si = scorched.connection.SolrInterface(
173 |                 'http://localhost:2222/mysolr')
174 |         return si
175 | 
176 |     def test__should_skip_value(self):
177 |         sc = self._make_one()
178 |         self.assertTrue(sc._should_skip_value(None))
179 |         self.assertTrue(sc._should_skip_value({'set': None}))
180 |         self.assertFalse(sc._should_skip_value(1))
181 |         self.assertFalse(sc._should_skip_value({'set': 1}))
182 | 
183 |     def test__prepare_docs_does_not_alter_given_docs(self):
184 |         sc = self._make_one()
185 |         today = datetime.datetime.utcnow()
186 |         docs = [{'last_modified': today}]
187 |         sc._prepare_docs(docs)
188 |         self.assertEqual(docs, [{'last_modified': today}])
189 | 
190 |     def test__prepare_docs_converts_datetime(self):
191 |         sc = self._make_one()
192 |         dt = datetime.datetime(2014, 2, 18, 12, 12, 10)
193 |         docs = [{'last_modified': dt}]
194 |         result = sc._prepare_docs(docs)
195 |         self.assertEqual(result[0]['last_modified'], "2014-02-18T12:12:10Z")
196 | 
197 |     def test__prepare_docs_converts_datetime_atomic_update(self):
198 |         sc = self._make_one()
199 |         dt = datetime.datetime(2014, 2, 18, 12, 12, 10)
200 |         docs = [{'last_modified': {'set': dt}}]
201 |         result = sc._prepare_docs(docs)
202 |         self.assertEqual(
203 |             result[0]['last_modified']['set'],
204 |             '2014-02-18T12:12:10Z',
205 |         )
206 | 


--------------------------------------------------------------------------------
/scorched/tests/test_dates.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import pytz
  3 | import unittest
  4 | import scorched.exc
  5 | import pytest
  6 | 
  7 | from scorched.dates import (solr_date, datetime_from_w3_datestring,
  8 |                             datetime_factory)
  9 | from scorched.search import LuceneQuery
 10 | 
 11 | not_utc = pytz.timezone('Etc/GMT-3')
 12 | 
 13 | samples_from_pydatetimes = {
 14 |     "2009-07-23T03:24:34.000376Z":
 15 |     [datetime.datetime(2009, 7, 23, 3, 24, 34, 376),
 16 |      datetime.datetime(2009, 7, 23, 3, 24, 34, 376, pytz.utc)],
 17 |     "2009-07-23T00:24:34.000376Z":
 18 |     [not_utc.localize(datetime.datetime(2009, 7, 23, 3, 24, 34, 376)),
 19 |      datetime.datetime(2009, 7, 23, 0, 24, 34, 376, pytz.utc)],
 20 |     "2009-07-23T03:24:34Z":
 21 |     [datetime.datetime(2009, 7, 23, 3, 24, 34),
 22 |      datetime.datetime(2009, 7, 23, 3, 24, 34, tzinfo=pytz.utc)],
 23 |     "2009-07-23T00:24:34Z":
 24 |     [not_utc.localize(datetime.datetime(2009, 7, 23, 3, 24, 34)),
 25 |      datetime.datetime(2009, 7, 23, 0, 24, 34, tzinfo=pytz.utc)]
 26 | }
 27 | 
 28 | samples_from_strings = {
 29 |     # These will not have been serialized by us, but we should deal with them
 30 |     "2009-07-23T03:24:34Z":
 31 |     datetime.datetime(2009, 7, 23, 3, 24, 34, tzinfo=pytz.utc),
 32 |     "2009-07-23T03:24:34.1Z":
 33 |     datetime.datetime(2009, 7, 23, 3, 24, 34, 100000, pytz.utc),
 34 |     "2009-07-23T03:24:34.123Z":
 35 |     datetime.datetime(2009, 7, 23, 3, 24, 34, 122999, pytz.utc)
 36 | }
 37 | 
 38 | 
 39 | def check_solr_date_from_date(s, date, canonical_date):
 40 |     from scorched.compat import str
 41 |     assert str(solr_date(date)) == s, "Unequal representations of %r: %r and %r" % (
 42 |         date, str(solr_date(date)), s)
 43 |     check_solr_date_from_string(s, canonical_date)
 44 | 
 45 | 
 46 | def check_solr_date_from_string(s, date):
 47 |     assert solr_date(s)._dt_obj == date, "Unequal representations of %r: %r and %r" % (
 48 |         solr_date(s)._dt_obj, date, s)
 49 | 
 50 | 
 51 | @pytest.mark.parametrize(
 52 |         "dt_string,dt_objects", samples_from_pydatetimes.items())
 53 | def test_solr_date_from_pydatetimes(dt_string, dt_objects):
 54 |     check_solr_date_from_date(dt_string, dt_objects[0], dt_objects[1])
 55 | 
 56 | 
 57 | @pytest.mark.parametrize(
 58 |         "dt_string,dt_object", samples_from_strings.items())
 59 | def test_solr_date_from_strings(dt_string, dt_object):
 60 |     check_solr_date_from_string(dt_string, dt_object)
 61 | 
 62 | 
 63 | class TestDates(unittest.TestCase):
 64 | 
 65 |     def test_datetime_from_w3_datestring(self):
 66 |         self.assertRaises(ValueError,
 67 |                           datetime_from_w3_datestring, "")
 68 |         self.assertEqual(datetime_from_w3_datestring("2009-07-23T03:24:34.123+16:50"),
 69 |                          datetime.datetime(2009, 7, 23, 20, 14, 34, 122999,
 70 |                                            tzinfo=pytz.utc))
 71 |         self.assertEqual(datetime_from_w3_datestring("2009-07-23T03:24:34.123-16:50"),
 72 |                          datetime.datetime(2009, 7, 22, 10, 34, 34, 122999,
 73 |                                            tzinfo=pytz.utc))
 74 | 
 75 |     def test_datetime_factory(self):
 76 |         self.assertRaises(ValueError,
 77 |                           datetime_factory, year=1990, month=12,
 78 |                           day=12345)
 79 | 
 80 |     def test_solr_date(self):
 81 |         self.assertRaises(scorched.exc.SolrError, solr_date, None)
 82 |         s = solr_date("2009-07-23T03:24:34.000376Z")
 83 |         s_older = solr_date("2007-07-23T03:24:34.000376Z")
 84 |         self.assertEqual(s.microsecond, 376)
 85 |         self.assertEqual(s, solr_date(s))
 86 |         self.assertTrue(s == s)
 87 |         self.assertTrue(s > s_older)
 88 |         self.assertTrue(s_older < s)
 89 |         self.assertRaises(TypeError, s.__lt__, datetime.datetime(2009, 7, 22, 10))
 90 |         if scorched.compat.is_py2:  # pragma: no cover
 91 |             self.assertRaises(TypeError, s.__eq__, datetime.datetime(2009, 7, 22, 10))
 92 |         else:  # pragma: no cover
 93 |             self.assertFalse(s == "Foo")
 94 |         self.assertEqual(s.__repr__(), 'datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=<UTC>)')
 95 | 
 96 |     def test_solr_date_from_str(self):
 97 |         # str here is original str from python
 98 |         self.assertTrue("'str'" in repr(str))
 99 |         s = solr_date(str("2009-07-23T03:24:34.000376Z"))
100 |         self.assertEqual(s, solr_date(s))
101 |         self.assertTrue(s == s)
102 | 
103 |     def test_solr_date_ranges(self):
104 |         query = LuceneQuery()
105 |         date = solr_date("2009-07-23T03:24:34.000376Z")
106 |         query.Q(**{"last_modified__gt": date})
107 | 


--------------------------------------------------------------------------------
/scorched/tests/test_functional.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | import pytest
  5 | 
  6 | from scorched import SolrInterface
  7 | 
  8 | 
  9 | class Book:
 10 |     def __init__(self, name, author, **other_kwargs):
 11 |         self.title = name
 12 |         self.author = author
 13 |         self.other_kwargs = other_kwargs
 14 | 
 15 |     def __repr__(self):
 16 |         return 'Book("%s", "%s")' % (self.title, self.author)
 17 | 
 18 | 
 19 | @pytest.fixture(scope="module")
 20 | def books():
 21 |     file_ = os.path.join(os.path.dirname(__file__), "dumps", "books.json")
 22 |     with open(file_) as f:
 23 |         datajson = f.read()
 24 |         docs = json.loads(datajson)
 25 |     return docs
 26 | 
 27 | 
 28 | @pytest.fixture
 29 | def si(solr_url):
 30 |     si_ = SolrInterface(solr_url)
 31 |     yield si_
 32 |     si_.delete_all()
 33 |     si_.commit()
 34 | 
 35 | 
 36 | def test_get(si, books):
 37 |     res = si.get("978-1423103349")
 38 |     assert len(res) == 0
 39 | 
 40 |     si.add(books)
 41 |     res = si.get("978-1423103349")
 42 |     assert len(res) == 1
 43 |     assert res[0]["name"] == "The Sea of Monsters"
 44 | 
 45 |     res = si.get(["978-0641723445", "978-1423103349", "nonexist"])
 46 |     assert len(res) == 2
 47 |     assert [x["name"] for x in res] == ["The Lightning Thief", "The Sea of Monsters"]
 48 | 
 49 |     si.commit()
 50 | 
 51 |     res = si.get(ids="978-1423103349", fields=["author"])
 52 |     assert len(res) == 1
 53 |     assert list(res[0].keys()) == ["author"]
 54 | 
 55 | 
 56 | def test_query(si, books):
 57 |     si.add(books)
 58 |     si.commit()
 59 |     res = si.query(genre_s="fantasy").execute()
 60 |     assert res.result.numFound == 3
 61 | 
 62 |     res = si.delete_by_ids(res.result.docs[0]["id"])
 63 |     assert res.status == 0
 64 |     res = si.query(genre_s="fantasy").execute()
 65 |     si.commit()
 66 |     res = si.query(genre_s="fantasy").execute()
 67 |     assert res.result.numFound == 2
 68 |     res = si.query(genre_s="fantasy").execute(constructor=Book)
 69 | 
 70 |     # test constructor
 71 |     assert [x.title for x in res.result.docs] == [
 72 |         "The Sea of Monsters",
 73 |         "Sophie's World : The Greek Philosophers",
 74 |     ]
 75 | 
 76 | 
 77 | def test_cursor(si, books):
 78 |     si.add(books)
 79 |     si.commit()
 80 |     cursor = si.query(genre_s="fantasy").sort_by("id").cursor(rows=1)
 81 | 
 82 |     # Count how often we hit solr
 83 |     search_count = [0]
 84 |     old_search = cursor.search.interface.search
 85 | 
 86 |     def search_proxy(*args, **kwargs):
 87 |         search_count[0] += 1
 88 |         return old_search(*args, **kwargs)
 89 | 
 90 |     cursor.search.interface.search = search_proxy
 91 | 
 92 |     list(cursor)
 93 |     assert search_count[0] == 4  # 3 + 1 to realize we are done
 94 | 
 95 |     search_count = [0]
 96 |     cursor = si.query(genre_s="fantasy").sort_by("id").cursor(constructor=Book, rows=2)
 97 |     # test constructor
 98 |     assert [x.title for x in cursor] == [
 99 |         "The Lightning Thief",
100 |         "The Sea of Monsters",
101 |         "Sophie's World : The Greek Philosophers",
102 |     ]
103 | 
104 |     assert search_count[0] == 3
105 | 
106 |     # empty results
107 |     search_count = [0]
108 |     cursor = si.query(genre_s="nonexist").sort_by("id").cursor(constructor=Book)
109 |     assert list(cursor) == []
110 |     assert search_count[0] == 1
111 | 
112 | 
113 | def test_rollback(si, books):
114 |     si.add(books)
115 |     si.commit()
116 |     res = si.query(genre_s="fantasy").execute()
117 |     assert res.result.numFound == 3
118 |     # delete
119 |     res = si.delete_by_ids(res.result.docs[0]["id"])
120 |     assert res.status == 0
121 | 
122 |     # rollback
123 |     res = si.rollback()
124 |     assert res.status == 0
125 |     res = si.query(genre_s="fantasy").execute()
126 |     assert res.result.numFound == 3
127 | 
128 | 
129 | def test_chunked_add(si, books):
130 |     assert len(books) == 4
131 |     # chunk size = 1, chunks = 4
132 |     si.delete_all()
133 |     res = si.add(books, chunk=1)
134 |     assert len(res) == 4
135 |     assert [r.status for r in res] == [0] * 4
136 |     si.commit()
137 |     res = si.query(genre_s="fantasy").execute()
138 |     assert res.result.numFound == 3
139 |     # chunk size = 2, chunks = 2
140 |     si.delete_all()
141 | 
142 |     res = si.add(books, chunk=2)
143 |     assert len(res) == 2
144 |     assert [r.status for r in res] == [0] * 2
145 |     si.commit()
146 |     res = si.query(genre_s="fantasy").execute()
147 |     assert res.result.numFound == 3
148 | 
149 | 
150 | def test_facet_query(si, books):
151 |     res = si.add(books)
152 |     assert res[0].status == 0
153 |     si.commit()
154 |     res = si.query(genre_s="fantasy").facet_by("cat").execute()
155 |     assert res.result.numFound == 3
156 |     assert [x["name"] for x in res.result.docs] == [
157 |         "The Lightning Thief",
158 |         "The Sea of Monsters",
159 |         "Sophie's World : The Greek Philosophers",
160 |     ]
161 | 
162 |     assert res.facet_counts.__dict__ == {
163 |         "facet_fields": {"cat": [("book", 3), ("paperback", 2), ("hardcover", 1)]},
164 |         "facet_dates": {},
165 |         "facet_queries": {},
166 |         "facet_ranges": {},
167 |         "facet_pivot": {},
168 |     }
169 | 
170 | 
171 | def test_filter_query(si, books):
172 |     si.add(books)
173 |     si.commit()
174 |     res = (
175 |         si.query(si.Q(**{"*": "*"}))
176 |         .filter(cat="hardcover")
177 |         .filter(genre_s="fantasy")
178 |         .execute()
179 |     )
180 |     assert res.result.numFound == 1
181 |     assert [x["name"] for x in res.result.docs] == ["The Lightning Thief"]
182 | 
183 | 
184 | def test_edismax_query(si, books):
185 |     si.add(books)
186 |     si.commit()
187 |     res = (
188 |         si.query(si.Q(**{"*": "*"}))
189 |         .filter(cat="hardcover")
190 |         .filter(genre_s="fantasy")
191 |         .alt_parser("edismax")
192 |         .execute()
193 |     )
194 |     assert res.result.numFound == 1
195 |     assert [x["name"] for x in res.result.docs] == ["The Lightning Thief"]
196 | 
197 | 
198 | def test_mlt_component_query(si, books):
199 |     si.add(books)
200 |     si.commit()
201 |     res = si.query(id="978-0641723445").mlt("genre_s", mintf=1, mindf=1).execute()
202 |     # query shows only one
203 |     assert res.result.numFound == 1
204 |     # but in more like this we get two
205 |     assert len(res.more_like_these["978-0641723445"].docs), 2
206 |     assert [x["author"] for x in res.more_like_these["978-0641723445"].docs] == [
207 |         "Rick Riordan",
208 |         "Jostein Gaarder",
209 |     ]
210 | 
211 | 
212 | def test_encoding(si):
213 |     docs = {
214 |         "id": "978-0641723445",
215 |         "cat": ["book", "hardcover"],
216 |         "name": "The Höhlentripp Strauß",
217 |         "author": "Röüß Itoa",
218 |         "series_t": "Percy Jackson and \N{UMBRELLA}nicode",
219 |         "sequence_i": 1,
220 |         "genre_s": "fantasy",
221 |         "inStock": True,
222 |         "price": 12.50,
223 |         "pages_i": 384,
224 |     }
225 |     si.add(docs)
226 |     si.commit()
227 |     res = si.query(author=u"Röüß").execute()
228 |     assert res.result.numFound == 1
229 |     for k, v in docs.items():
230 |         assert res.result.docs[0][k] == v
231 | 
232 | 
233 | def test_multi_value_dates(si):
234 |     docs = {
235 |         "id": "978",
236 |         "important_dts": [
237 |             "1969-01-01",
238 |             "1969-01-02",
239 |         ],
240 |     }
241 |     si.add(docs)
242 |     si.commit()
243 |     _ = si.query(id=u"978").execute()
244 | 
245 | 
246 | def test_highlighting(si):
247 |     docs = {
248 |         "id": "978-0641723445",
249 |         "cat": ["book", "hardcover"],
250 |         "name": "The Höhlentripp Strauß",
251 |         "author": "Röüß Itoa",
252 |         "series_t": "Percy Jackson and \N{UMBRELLA}nicode",
253 |         "sequence_i": 1,
254 |         "genre_s": "fantasy",
255 |         "inStock": True,
256 |         "price": 12.50,
257 |         "pages_i": 384,
258 |     }
259 |     si.add(docs)
260 |     si.commit()
261 |     res = si.query(author=u"Röüß").highlight("author").execute()
262 |     highlighted_field_result = "<em>Röüß</em> Itoa"
263 |     # Does the highlighting attribute work?
264 |     assert res.highlighting["978-0641723445"]["author"][0] == highlighted_field_result
265 | 
266 |     # Does each item have highlighting attributes?
267 |     assert (
268 |         res.result.docs[0]["solr_highlights"]["author"][0] == highlighted_field_result
269 |     )
270 | 
271 | 
272 | def test_count(si):
273 |     docs = [
274 |         {
275 |             "id": "1",
276 |             "genre_s": "fantasy",
277 |         },
278 |         {
279 |             "id": "2",
280 |             "genre_s": "fantasy",
281 |         },
282 |     ]
283 |     si.add(docs)
284 |     si.commit()
285 |     ungrouped_count = si.query(genre_s="fantasy").count()
286 |     ungrouped_count_expected = 2
287 |     assert ungrouped_count == ungrouped_count_expected
288 |     grouped_count = si.query(genre_s="fantasy").group_by("genre_s").count()
289 |     grouped_count_expected = 1
290 |     assert grouped_count == grouped_count_expected
291 | 
292 | 
293 | def test_debug(si):
294 |     docs = {
295 |         "id": "978-0641723445",
296 |         "cat": ["book", "hardcover"],
297 |         "name": "The Höhlentripp Strauß",
298 |         "author": "Röüß Itoa",
299 |         "series_t": "Percy Jackson and \N{UMBRELLA}nicode",
300 |         "sequence_i": 1,
301 |         "genre_s": "fantasy",
302 |         "inStock": True,
303 |         "price": 12.50,
304 |         "pages_i": 384,
305 |     }
306 |     si.add(docs)
307 |     si.commit()
308 |     res = si.query(author="Röüß").debug().execute()
309 |     assert res.result.numFound == 1
310 |     for k, v in docs.items():
311 |         assert res.result.docs[0][k] == v
312 |     assert "explain" in res.debug
313 |     # deactivate
314 |     res = si.query(author="Röüß").execute()
315 |     assert "explain" not in res.debug
316 | 
317 | 
318 | def test_spellcheck(si):
319 |     opts = si.query(name=u"Monstes").spellcheck().options()
320 |     assert {"q": "name:Monstes", "spellcheck": True} == opts
321 | 
322 | 
323 | def test_extract(si):
324 |     pdf = os.path.join(os.path.dirname(__file__), "data", "lipsum.pdf")
325 |     with open(pdf, "rb") as f:
326 |         data = si.extract(f)
327 |     assert 0 == data.status
328 |     assert "Lorem ipsum" in data.text
329 |     assert ["pdfTeX-1.40.13"] == data.metadata["producer"]
330 | 
331 | 
332 | def test_mlt(si, books):
333 |     si.add(books)
334 |     si.commit()
335 |     res = (
336 |         si.mlt_query("genre_s", interestingTerms="details", mintf=1, mindf=1)
337 |         .query(id="978-0641723445")
338 |         .execute()
339 |     )
340 |     assert res.result.numFound == 2
341 |     assert res.interesting_terms == ["genre_s:fantasy", 1.0]
342 |     assert [x["author"] for x in res.result.docs] == ["Rick Riordan", "Jostein Gaarder"]
343 | 


--------------------------------------------------------------------------------
/scorched/tests/test_response.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import os.path
  3 | import unittest
  4 | 
  5 | import pytz
  6 | 
  7 | import scorched.response
  8 | 
  9 | 
 10 | class ResultsTestCase(unittest.TestCase):
 11 |     def setUp(self):
 12 |         file_path = os.path.join(
 13 |             os.path.dirname(__file__), "dumps", "request_w_facets.json"
 14 |         )
 15 |         with open(file_path) as f:
 16 |             self.data = f.read()
 17 |         # termVector data
 18 |         file_path = os.path.join(
 19 |             os.path.dirname(__file__), "dumps", "request_w_termvector.json"
 20 |         )
 21 |         with open(file_path) as f:
 22 |             self.data_tv = f.read()
 23 |         # error data
 24 |         file_path = os.path.join(
 25 |             os.path.dirname(__file__), "dumps", "request_error.json"
 26 |         )
 27 |         with open(file_path) as f:
 28 |             self.data_error = f.read()
 29 | 
 30 |         file_path = os.path.join(os.path.dirname(__file__), "dumps", "request_hl.json")
 31 |         with open(file_path) as f:
 32 |             self.data_hl = f.read()
 33 | 
 34 |         file_path = os.path.join(
 35 |             os.path.dirname(__file__), "dumps", "request_hl_grouped.json"
 36 |         )
 37 |         with open(file_path) as f:
 38 |             self.data_hl_grouped = f.read()
 39 | 
 40 |     def test_response(self):
 41 |         res = scorched.response.SolrResponse.from_json(
 42 |             self.data, "id", datefields=("*_dt", "modified")
 43 |         )
 44 |         self.assertEqual(res.status, 0)
 45 |         self.assertEqual(res.QTime, 1)
 46 |         self.assertEqual(res.result.numFound, 3)
 47 |         # iterable
 48 |         self.assertEqual(
 49 |             [x["name"] for x in res],
 50 |             [
 51 |                 "The Lightning Thief",
 52 |                 "The Sea of Monsters",
 53 |                 "Sophie's World : The Greek Philosophers",
 54 |             ],
 55 |         )
 56 |         self.assertEqual(
 57 |             [x["name"] for x in res.result.docs],
 58 |             [
 59 |                 "The Lightning Thief",
 60 |                 "The Sea of Monsters",
 61 |                 "Sophie's World : The Greek Philosophers",
 62 |             ],
 63 |         )
 64 |         self.assertEqual(
 65 |             [x["created_dt"] for x in res.result.docs if "created_dt" in x],
 66 |             [datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=pytz.utc)],
 67 |         )
 68 |         self.assertEqual(
 69 |             [x["modified"] for x in res.result.docs if "modified" in x],
 70 |             [datetime.datetime(2009, 7, 23, 3, 24, 34, 376, tzinfo=pytz.utc)],
 71 |         )
 72 |         self.assertEqual(
 73 |             res.facet_counts.__dict__,
 74 |             {
 75 |                 "facet_fields": {
 76 |                     "cat": [("book", 3), ("paperback", 2), ("hardcover", 1)]
 77 |                 },
 78 |                 "facet_dates": {},
 79 |                 "facet_queries": {},
 80 |                 "facet_ranges": {
 81 |                     "created_dt": {
 82 |                         "gap": "+1YEARS",
 83 |                         "start": "2009-01-01T00:00:00Z",
 84 |                         "end": "2012-01-01T00:00:00Z",
 85 |                         "counts": [
 86 |                             ("2009-01-01T00:00:00Z", 1),
 87 |                             ("2010-01-01T00:00:00Z", 0),
 88 |                             ("2011-01-01T00:00:00Z", 0),
 89 |                         ],
 90 |                     },
 91 |                 },
 92 |                 "facet_pivot": {},
 93 |             },
 94 |         )
 95 | 
 96 |         self.assertRaises(ValueError, res.from_json, self.data_error, "id")
 97 |         self.assertEqual(res.__str__(), "3 results found, starting at #0")
 98 |         self.assertEqual(len(res), 3)
 99 | 
100 |     def test_term_vectors(self):
101 |         res_tv = scorched.response.SolrResponse.from_json(
102 |             self.data_tv, "id", datefields=("date",)
103 |         )
104 |         self.assertEqual(res_tv.term_vectors["uniqueKeyFieldName"], "uid")
105 |         self.assertEqual(res_tv.term_vectors["warnings"], {"noTermVectors": ["title"]})
106 |         self.assertEqual(
107 |             res_tv.term_vectors["ffaa9370-5182-5810-b8a9-54b751ef0606"]["uniqueKey"],
108 |             "ffaa9370-5182-5810-b8a9-54b751ef0606",
109 |         )
110 |         self.assertEqual(
111 |             res_tv.term_vectors["ffaa9370-5182-5810-b8a9-54b751ef0606"][
112 |                 "weighted_words"
113 |             ]["wirken"],
114 |             {"tf": 1, "df": 106},
115 |         )
116 |         self.assertEqual(
117 |             res_tv.term_vectors["9ce8ef2d-6e0f-5647-ae4c-2aaaca37b28f"][
118 |                 "weighted_words"
119 |             ]["anlagen"],
120 |             {"tf": 3, "df": 21484},
121 |         )
122 | 
123 |     def test_highlighting(self):
124 |         res_hl = scorched.response.SolrResponse.from_json(self.data_hl, "id")
125 |         highlights = {"author": ["<em>John</em> Muir"]}
126 |         self.assertEqual(res_hl.highlighting["978"], highlights)
127 |         self.assertEqual(res_hl.result.docs[0]["solr_highlights"], highlights)
128 | 
129 |     def test_highlighting_with_grouping(self):
130 |         res_hl_group = scorched.response.SolrResponse.from_json(
131 |             self.data_hl_grouped, "id", datefields=("important_dts",)
132 |         )
133 |         self.assertEqual(res_hl_group.group_field, "inStock")
134 |         self.assertEqual(
135 |             getattr(res_hl_group.groups, res_hl_group.group_field)["matches"], 2
136 |         )
137 |         ngroups = getattr(res_hl_group.groups, res_hl_group.group_field)["ngroups"]
138 |         self.assertEqual(ngroups, 1)
139 | 
140 |         groups = getattr(res_hl_group.groups, res_hl_group.group_field)["groups"]
141 |         self.assertEqual(len(groups), ngroups)
142 | 
143 |         highlights = {"author": ["John <em>Muir</em>"]}
144 |         self.assertEqual(groups[0]["doclist"]["docs"][0]["solr_highlights"], highlights)
145 |         self.assertEqual(
146 |             type(groups[0]["doclist"]["docs"][0]["important_dts"][0]), datetime.datetime
147 |         )
148 | 


--------------------------------------------------------------------------------
/scorched/tests/test_search.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | from scorched.exc import SolrError
  3 | from scorched.search import (SolrSearch, MltSolrSearch, PaginateOptions,
  4 |                              SortOptions, FieldLimitOptions, FacetOptions,
  5 |                              GroupOptions, HighlightOptions, DismaxOptions,
  6 |                              MoreLikeThisOptions, EdismaxOptions,
  7 |                              PostingsHighlightOptions, FacetPivotOptions,
  8 |                              RequestHandlerOption, DebugOptions,
  9 |                              params_from_dict, FacetRangeOptions,
 10 |                              TermVectorOptions, StatOptions,
 11 |                              is_iter)
 12 | from scorched.strings import WildcardString
 13 | import pytest
 14 | 
 15 | 
 16 | debug = False
 17 | 
 18 | base_good_query_data = {
 19 |     "query_by_term": [
 20 |         (["hello"], {},
 21 |          [("q", b"hello")]),
 22 |         (["hello"], {"int_field": 3},
 23 |          [("q", b"hello AND int_field:3")]),
 24 |         (["hello", "world"], {},
 25 |          [("q", b"hello AND world")]),
 26 |         # NB this next is not really what we want,
 27 |         # probably this should warn
 28 |         (["hello world"], {},
 29 |          [("q", b"hello\\ world")]),
 30 |     ],
 31 | 
 32 |     "query_by_phrase": [
 33 |         (["hello"], {},
 34 |          [("q", b"hello")]),
 35 |         (["hello"], {"int_field": 3},
 36 |          # Non-text data is always taken to be a term, and terms come before
 37 |          # phrases, so order is reversed
 38 |          [("q", b"int_field:3 AND hello")]),
 39 |         (["hello", "world"], {},
 40 |          [("q", b"hello AND world")]),
 41 |         (["hello world"], {},
 42 |          [("q", b"hello\\ world")]),
 43 |         ([], {'string_field': ['hello world', 'goodbye, cruel world']},
 44 |          [("q", b"string_field:goodbye,\\ cruel\\ world AND string_field:hello\\ world")]),
 45 |     ],
 46 | 
 47 |     "query": [
 48 |         # Basic queries
 49 |         (["hello"], {},
 50 |          [("q", b"hello")]),
 51 |         (["hello"], {"int_field": 3},
 52 |          [("q", b"hello AND int_field:3")]),
 53 |         (["hello", "world"], {},
 54 |          [("q", b"hello AND world")]),
 55 |         (["hello world"], {},
 56 |          [("q", b"hello\\ world")]),
 57 |         # Test fields
 58 |         # Boolean fields take any truth-y value
 59 |         ([], {"boolean_field": True},
 60 |          [("q", b"boolean_field:true")]),
 61 |         ([], {"boolean_field": 'true'},
 62 |          [("q", b"boolean_field:true")]),
 63 |         ([], {"boolean_field": "false"},
 64 |          [("q", b"boolean_field:false")]),
 65 |         ([], {"boolean_field": False},
 66 |          [("q", b"boolean_field:false")]),
 67 |         ([], {"int_field": 3},
 68 |          [("q", b"int_field:3")]),
 69 |         ([], {"sint_field": 3},
 70 |          [("q", b"sint_field:3")]),
 71 |         ([], {"long_field": 2 ** 31},
 72 |          [("q", b"long_field:2147483648")]),
 73 |         ([], {"slong_field": 2 ** 31},
 74 |          [("q", b"slong_field:2147483648")]),
 75 |         ([], {"float_field": 3.0},
 76 |          [("q", b"float_field:3.0")]),
 77 |         ([], {"sfloat_field": 3.0},
 78 |          [("q", b"sfloat_field:3.0")]),
 79 |         ([], {"double_field": 3.0},
 80 |          [("q", b"double_field:3.0")]),
 81 |         ([], {"sdouble_field": 3.0},
 82 |          [("q", b"sdouble_field:3.0")]),
 83 |         ([], {"date_field": datetime.datetime(2009, 1, 1)},
 84 |          [("q", b"date_field:2009\\-01\\-01T00\\:00\\:00Z")]),
 85 |         # Test ranges
 86 |         ([], {"int_field__any": True},
 87 |          [("q", b"int_field:[* TO *]")]),
 88 |         ([], {"int_field__lt": 3},
 89 |          [("q", b"int_field:{* TO 3}")]),
 90 |         ([], {"int_field__gt": 3},
 91 |          [("q", b"int_field:{3 TO *}")]),
 92 |         ([], {"int_field__rangeexc": (-3, 3)},
 93 |          [("q", b"int_field:{\\-3 TO 3}")]),
 94 |         ([], {"int_field__rangeexc": (3, -3)},
 95 |          [("q", b"int_field:{\\-3 TO 3}")]),
 96 |         ([], {"int_field__lte": 3},
 97 |          [("q", b"int_field:[* TO 3]")]),
 98 |         ([], {"int_field__gte": 3},
 99 |          [("q", b"int_field:[3 TO *]")]),
100 |         ([], {"int_field__range": (-3, 3)},
101 |          [("q", b"int_field:[\\-3 TO 3]")]),
102 |         ([], {"int_field__range": (3, -3)},
103 |          [("q", b"int_field:[\\-3 TO 3]")]),
104 |         ([], {"date_field__lt": datetime.datetime(2009, 1, 1)},
105 |          [("q", b"date_field:{* TO 2009\\-01\\-01T00\\:00\\:00Z}")]),
106 |         ([], {"date_field__gt": datetime.datetime(2009, 1, 1)},
107 |          [("q", b"date_field:{2009\\-01\\-01T00\\:00\\:00Z TO *}")]),
108 |         ([], {
109 |             "date_field__rangeexc": (datetime.datetime(2009, 1, 1), datetime.datetime(2009, 1, 2))},
110 |          [("q", b"date_field:{2009\\-01\\-01T00\\:00\\:00Z TO 2009\\-01\\-02T00\\:00\\:00Z}")]),
111 |         ([], {"date_field__lte": datetime.datetime(2009, 1, 1)},
112 |          [("q", b"date_field:[* TO 2009\\-01\\-01T00\\:00\\:00Z]")]),
113 |         ([], {"date_field__gte": datetime.datetime(2009, 1, 1)},
114 |          [("q", b"date_field:[2009\\-01\\-01T00\\:00\\:00Z TO *]")]),
115 |         ([], {
116 |             "date_field__range": (datetime.datetime(2009, 1, 1), datetime.datetime(2009, 1, 2))},
117 |          [("q", b"date_field:[2009\\-01\\-01T00\\:00\\:00Z TO 2009\\-01\\-02T00\\:00\\:00Z]")]),
118 |         ([], {'string_field': ['hello world', 'goodbye, cruel world']},
119 |          [("q", b"string_field:goodbye,\\ cruel\\ world AND string_field:hello\\ world")]),
120 |         # Raw strings
121 |         ([], {'string_field': "abc*???"},
122 |          [("q", b"string_field:abc\\*\\?\\?\\?")]),
123 |     ],
124 | }
125 | 
126 | good_query_data = {
127 |     "filter_by_term": [
128 |         (["hello"], {},
129 |          [("fq", b"hello"), ("q", b"*:*")]),
130 |         # test multiple fq
131 |         (["hello"], {"int_field": 3},
132 |          [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]),
133 |         (["hello", "world"], {},
134 |          [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]),
135 |         # NB this next is not really what we want,
136 |         # probably this should warn
137 |         (["hello world"], {},
138 |          [("fq", b"hello\\ world"), ("q", b"*:*")]),
139 |     ],
140 | 
141 |     "filter_by_phrase": [
142 |         (["hello"], {},
143 |          [("fq", b"hello"), ("q", b"*:*")]),
144 |         # test multiple fq
145 |         (["hello"], {"int_field": 3},
146 |          [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]),
147 |         (["hello", "world"], {},
148 |          [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]),
149 |         (["hello world"], {},
150 |          [("fq", b"hello\\ world"), ("q", b"*:*")]),
151 |     ],
152 | 
153 |     "filter": [
154 |         (["hello"], {},
155 |          [("fq", b"hello"), ("q", b"*:*")]),
156 |         # test multiple fq
157 |         (["hello"], {"int_field": 3},
158 |          [("fq", b"hello"), ("fq", b"int_field:3"), ("q", b"*:*")]),
159 |         (["hello", "world"], {},
160 |          [("fq", b"hello"), ("fq", b"world"), ("q", b"*:*")]),
161 |         (["hello world"], {},
162 |          [("fq", b"hello\\ world"), ("q", b"*:*")]),
163 |     ],
164 | }
165 | good_query_data.update(base_good_query_data)
166 | 
167 | 
168 | def check_query_data(method, args, kwargs, output):
169 |     solr_search = SolrSearch(None)
170 |     p = getattr(solr_search, method)(*args, **kwargs).params()
171 |     assert p == output, "Unequal: %r, %r" % (p, output)
172 | 
173 | 
174 | def check_mlt_query_data(method, args, kwargs, output):
175 |     solr_search = MltSolrSearch(None)
176 |     p = getattr(solr_search, method)(*args, **kwargs).params()
177 |     assert p == output, "Unequal: %r, %r" % (p, output)
178 | 
179 | 
180 | good_option_data = {
181 |     PaginateOptions: (
182 |         ({"start": 5, "rows": 10},
183 |          {"start": 5, "rows": 10}),
184 |         ({"start": 5, "rows": None},
185 |          {"start": 5}),
186 |         ({"start": None, "rows": 10},
187 |          {"rows": 10}),
188 |     ),
189 |     FacetOptions: (
190 |         ({"fields": "int_field"},
191 |          {"facet": True, "facet.field": ["int_field"]}),
192 |         ({"fields": ["int_field", "text_field"]},
193 |          {"facet": True, "facet.field": ["int_field", "text_field"]}),
194 |         ({"prefix": "abc"},
195 |          {"facet": True, "facet.prefix": "abc"}),
196 |         ({"prefix": "abc", "sort": True, "limit": 3, "offset": 25, "mincount": 1, "missing": False, "method": "enum"},
197 |          {"facet": True, "facet.prefix": "abc", "facet.sort": True, "facet.limit": 3, "facet.offset": 25, "facet.mincount": 1, "facet.missing": False, "facet.method": "enum"}),
198 |         ({"fields": "int_field", "prefix": "abc"},
199 |          {"facet": True, "facet.field": ["int_field"], "f.int_field.facet.prefix": "abc"}),
200 |         ({"fields": "int_field", "prefix": "abc", "limit": 3},
201 |          {"facet": True, "facet.field": ["int_field"], "f.int_field.facet.prefix": "abc", "f.int_field.facet.limit": 3}),
202 |         ({"fields": ["int_field", "text_field"], "prefix": "abc", "limit": 3},
203 |          {"facet": True, "facet.field": ["int_field", "text_field"], "f.int_field.facet.prefix": "abc", "f.int_field.facet.limit": 3, "f.text_field.facet.prefix": "abc", "f.text_field.facet.limit": 3, }),
204 |     ),
205 |     FacetRangeOptions: (
206 |         ({"fields": "field1", "start": 10, "end": 20, "gap": 2, "hardend": False,
207 |           "include": "outer", "other": "all", "limit": 10, "mincount": 1},
208 |          {"facet": True, "facet.range": ["field1"], "f.field1.facet.range.start": 10,
209 |           "f.field1.facet.range.end": 20, "f.field1.facet.range.gap": 2,
210 |           "f.field1.facet.range.hardend": "false", "f.field1.facet.range.include": "outer",
211 |           "f.field1.facet.range.other": "all", "f.field1.facet.limit": 1,
212 |           "f.field1.facet.mincount": 1}),
213 |     ),
214 |     FacetPivotOptions: (
215 |         ({"fields": ["text_field"]},
216 |          {"facet": True, "facet.pivot": "text_field"}),
217 |         ({"fields": ["int_field", "text_field"]},
218 |          {"facet": True, "facet.pivot": "int_field,text_field"}),
219 |         ({"fields": ["int_field", "text_field"], "mincount": 2},
220 |          {"facet": True, "facet.pivot": "int_field,text_field", "facet.pivot.mincount": 2}),
221 |     ),
222 |     GroupOptions: (
223 |         ({"field": "int_field", "limit": 10},
224 |          {"group": True, "group.limit": 10, "group.field": "int_field"}),
225 |     ),
226 |     SortOptions: (
227 |         ({"field": "int_field"},
228 |          {"sort": "int_field asc"}),
229 |         ({"field": "-int_field"},
230 |          {"sort": "int_field desc"}),
231 |     ),
232 |     HighlightOptions: (
233 |         ({"fields": "int_field"},
234 |          {"hl": True, "hl.fl": "int_field"}),
235 |         ({"fields": ["int_field", "text_field"]},
236 |          {"hl": True, "hl.fl": "int_field,text_field"}),
237 |         ({"snippets": 3},
238 |          {"hl": True, "hl.snippets": 3}),
239 |         ({"snippets": 3, "fragsize": 5, "mergeContinuous": True, "requireFieldMatch": True, "maxAnalyzedChars": 500, "alternateField": "text_field", "maxAlternateFieldLength": 50, "formatter": "simple", "simple.pre": "<b>", "simple.post": "</b>", "fragmenter": "regex", "usePhraseHighlighter": True, "highlightMultiTerm": True, "regex.slop": 0.2, "regex.pattern": "\\w", "regex.maxAnalyzedChars": 100},
240 |          {"hl": True, "hl.snippets": 3, "hl.fragsize": 5, "hl.mergeContinuous": True, "hl.requireFieldMatch": True, "hl.maxAnalyzedChars": 500, "hl.alternateField": "text_field", "hl.maxAlternateFieldLength": 50, "hl.formatter": "simple", "hl.simple.pre": "<b>", "hl.simple.post": "</b>", "hl.fragmenter": "regex", "hl.usePhraseHighlighter": True, "hl.highlightMultiTerm": True, "hl.regex.slop": 0.2, "hl.regex.pattern": "\\w", "hl.regex.maxAnalyzedChars": 100}),
241 |         ({"fields": "int_field", "snippets": "3"},
242 |          {"hl": True, "hl.fl": "int_field", "f.int_field.hl.snippets": 3}),
243 |         ({"fields": "int_field", "snippets": 3, "fragsize": 5},
244 |          {"hl": True, "hl.fl": "int_field", "f.int_field.hl.snippets": 3, "f.int_field.hl.fragsize": 5}),
245 |         ({"fields": ["int_field", "text_field"], "snippets": 3, "fragsize": 5},
246 |          {"hl": True, "hl.fl": "int_field,text_field", "f.int_field.hl.snippets": 3, "f.int_field.hl.fragsize": 5, "f.text_field.hl.snippets": 3, "f.text_field.hl.fragsize": 5}),
247 |     ),
248 |     PostingsHighlightOptions: (
249 |         ({"fields": "int_field"},
250 |          {"hl": True, "hl.fl": "int_field"}),
251 |         ({"fields": ["int_field", "text_field"]},
252 |          {"hl": True, "hl.fl": "int_field,text_field"}),
253 |         ({"snippets": 3},
254 |          {"hl": True, "hl.snippets": 3}),
255 |         ({"fields": ["int_field", "text_field"], "snippets": 1,
256 |           "tag.pre": "&lt;em&gt;", "tag.post": "&lt;em&gt;",
257 |           "tag.ellipsis": "...", "defaultSummary": True, "encoder": "simple",
258 |           "score.k1": 1.2, "score.b": 0.75, "score.pivot": 87,
259 |           "bs.type": "SENTENCE", "maxAnalyzedChars": 10000, },
260 |          {'f.text_field.hl.score.b': 0.75, 'f.int_field.hl.encoder': 'simple',
261 |           'f.int_field.hl.tag.pre': '&lt;em&gt;', 'f.text_field.hl.tag.pre':
262 |           '&lt;em&gt;', 'f.text_field.hl.defaultSummary': True,
263 |           'f.text_field.hl.tag.post': '&lt;em&gt;', 'f.text_field.hl.bs.type':
264 |           'SENTENCE', 'f.int_field.hl.tag.ellipsis': '...',
265 |           'f.text_field.hl.score.k1': 1.2, 'f.text_field.hl.tag.ellipsis':
266 |           '...', 'f.int_field.hl.score.pivot': 87.0,
267 |           'f.int_field.hl.tag.post': '&lt;em&gt;', 'f.int_field.hl.bs.type':
268 |           'SENTENCE', 'f.int_field.hl.score.b': 0.75,
269 |           'f.text_field.hl.maxAnalyzedChars': '10000', 'hl': True,
270 |           'f.text_field.hl.encoder': 'simple', 'hl.fl':
271 |           'int_field,text_field', 'f.int_field.hl.snippets': 1,
272 |           'f.text_field.hl.snippets': 1, 'f.int_field.hl.maxAnalyzedChars':
273 |           '10000', 'f.int_field.hl.score.k1': 1.2,
274 |           'f.int_field.hl.defaultSummary': True, 'f.text_field.hl.score.pivot':
275 |           87.0}),
276 |     ),
277 |     MoreLikeThisOptions: (
278 |         ({"fields": "int_field"},
279 |          {"mlt": True, "mlt.fl": "int_field"}),
280 |         ({"fields": ["int_field", "text_field"]},
281 |          {"mlt": True, "mlt.fl": "int_field,text_field"}),
282 |         ({"fields": ["text_field", "string_field"], "query_fields": {"text_field": 0.25, "string_field": 0.75}},
283 |          {"mlt": True, "mlt.fl": "string_field,text_field", "mlt.qf": "text_field^0.25 string_field^0.75"}),
284 |         ({"fields": "text_field", "count": 1},
285 |          {"mlt": True, "mlt.fl": "text_field", "mlt.count": 1}),
286 |     ),
287 |     TermVectorOptions: (
288 |         ({},
289 |          {"tv": True}),
290 |         ({"offsets": True},
291 |          {"tv": True, "tv.offsets": True}),
292 |         ({"fields": "text_field"},
293 |          {"tv": True, "tv.fl": "text_field"}),
294 |         ({"fields": ["int_field", "text_field"]},
295 |          {"tv": True, "tv.fl": "int_field, text_field"}),
296 |         ({"all": True, "df": 1, "offsets": 0, "positions": False,
297 |           "payloads": "true", "tf": False, "tf_idf": True},
298 |          {'tv': True, 'tv.df': True, 'tv.all': True, 'tv.tf_idf': True,
299 |           'tv.tf': False, 'tv.offsets': False, 'tv.payloads': True,
300 |           'tv.positions': False}),
301 |         ({"fields": "text_field", "all": True},
302 |          {'tv': True, 'tv.fl': 'text_field', 'f.text_field.tv.all': True}),
303 |         ({"fields": ["int_field", "text_field"], "tf": True},
304 |          {'tv': True, 'tv.fl': 'int_field,text_field',
305 |           'f.text_field.tv.tf': True, 'f.int_field.tv.tf': True}),
306 |     ),
307 |     DismaxOptions: (
308 |         ({"qf": {"text_field": 0.25, "string_field": 0.75}},
309 |          {'defType': 'dismax', 'qf': 'text_field^0.25 string_field^0.75'}),
310 |         ({"pf": {"text_field": 0.25, "string_field": 0.75}},
311 |          {'defType': 'dismax', 'pf': 'text_field^0.25 string_field^0.75'}),
312 |         ({"qf": {"text_field": 0.25, "string_field": 0.75}, "mm": 2},
313 |          {'mm': 2, 'defType': 'dismax', 'qf': 'text_field^0.25 string_field^0.75'}),
314 |     ),
315 |     EdismaxOptions: (
316 |         ({"qf": {"text_field": 0.25, "string_field": 0.75}},
317 |          {'defType': 'edismax', 'qf': 'text_field^0.25 string_field^0.75'}),
318 |         ({"pf": {"text_field": 0.25, "string_field": 0.75}},
319 |          {'defType': 'edismax', 'pf': 'text_field^0.25 string_field^0.75'}),
320 |         ({"qf": {"text_field": 0.25, "string_field": 0.75}, "mm": 2},
321 |          {'mm': 2, 'defType': 'edismax', 'qf': 'text_field^0.25 string_field^0.75'}),
322 |     ),
323 |     FieldLimitOptions: (
324 |         ({},
325 |          {}),
326 |         ({"fields": "int_field"},
327 |          {"fl": "int_field"}),
328 |         ({"fields": ["int_field", "text_field"]},
329 |          {"fl": "int_field,text_field"}),
330 |         ({"score": True},
331 |          {"fl": "score"}),
332 |         ({"all_fields": True, "score": True},
333 |          {"fl": "*,score"}),
334 |         ({"fields": "int_field", "score": True},
335 |          {"fl": "int_field,score"}),
336 |     ),
337 |     RequestHandlerOption: (
338 |         ({"handler": None},
339 |          {}),
340 |         ({"handler": "hans"},
341 |          {'qt': 'hans'}),
342 |     ),
343 |     DebugOptions: (
344 |         ({"debug": None},
345 |          {}),
346 |         ({"debug": False},
347 |          {}),
348 |         ({"debug": True},
349 |          {'debugQuery': True}),
350 |     ),
351 |     StatOptions: (
352 |         ({"fields": "int_field"},
353 |          {"stats": True, "stats.field": ['int_field']}),
354 |         ({"fields": ["int_field", "float_field"]},
355 |          {"stats": True, "stats.field": ['int_field', 'float_field']}),
356 |         ({"fields": ["int_field", "float_field"], "facet": "field0"},
357 |          {"stats": True, "stats.field": ['int_field', 'float_field'],
358 |           "stats.facet": "field0"}),
359 |     ),
360 | }
361 | 
362 | 
363 | def check_good_option_data(OptionClass, kwargs, output):
364 |     optioner = OptionClass()
365 |     optioner.update(**kwargs)
366 |     assert set(optioner.options()) == set(output), "Unequal: %r, %r" % (
367 |         optioner.options(), output)
368 | 
369 | # All these tests should really nominate which exception they're going to
370 | # throw.
371 | bad_option_data = {
372 |     PaginateOptions: (
373 |         {"start": -1, "rows": None},  # negative start
374 |         {"start": None, "rows": -1},  # negative rows
375 |     ),
376 |     FacetOptions: (
377 |         {"oops": True},  # undefined option
378 |         {"limit": "a"},  # invalid type
379 |         {"sort": "yes"},  # invalid choice
380 |         {"offset": -1},  # invalid value
381 |     ),
382 |     SortOptions: (
383 |     ),
384 |     HighlightOptions: (
385 |         {"oops": True},  # undefined option
386 |         {"snippets": "a"},  # invalid type
387 |     ),
388 |     MoreLikeThisOptions: (
389 |         # string_field in query_fields, not fields
390 |         {"fields": "text_field", "query_fields":
391 |             {"text_field": 0.25, "string_field": 0.75}},
392 |         # Non-float value for boost
393 |         {"fields": "text_field", "query_fields": {"text_field": "a"}},
394 |         {"fields": "text_field", "oops": True},  # undefined option
395 |         {"fields": "text_field", "count": "a"}  # Invalid value for option
396 |     ),
397 |     TermVectorOptions: (
398 |         {"foobar": True},  # undefined option
399 |     ),
400 |     DismaxOptions: (
401 |         # no ss
402 |         {"ss": {"text_field": 0.25, "string_field": 0.75}},
403 |         # no float in pf
404 |         {"pf": {"text_field": 0.25, "string_field": "ABBS"}},
405 |     ),
406 |     StatOptions: (
407 |         {"oops": True},  # undefined option
408 |     )
409 | }
410 | 
411 | 
412 | def check_bad_option_data(OptionClass, kwargs):
413 |     option = OptionClass()
414 |     exception_raised = False
415 |     try:
416 |         option.update(**kwargs)
417 |     except SolrError:
418 |         exception_raised = True
419 |     assert exception_raised
420 | 
421 | 
422 | complex_boolean_queries = (
423 |     (lambda q: q.query("hello world").filter(q.Q(text_field="tow") | q.Q(boolean_field=False, int_field__gt=3)),
424 |      [('fq', b'text_field:tow OR (boolean_field:false AND int_field:{3 TO *})'), ('q', b'hello\\ world')]),
425 |     # test multiple fq
426 |     (lambda q: q.query("hello world").filter(q.Q(text_field="tow") & q.Q(boolean_field=False, int_field__gt=3)),
427 |      [('fq', b'boolean_field:false'), ('fq', b'int_field:{3 TO *}'), ('fq', b'text_field:tow'), ('q',  b'hello\\ world')]),
428 |     # Test various combinations of NOTs at the top level.
429 |     # Sometimes we need to do the *:* trick, sometimes not.
430 |     (lambda q: q.query(~q.Q("hello world")),
431 |      [('q',  b'NOT hello\\ world')]),
432 |     (lambda q: q.query(~q.Q("hello world") & ~q.Q(int_field=3)),
433 |      [('q',  b'NOT hello\\ world AND NOT int_field:3')]),
434 |     (lambda q: q.query("hello world", ~q.Q(int_field=3)),
435 |      [('q', b'hello\\ world AND NOT int_field:3')]),
436 |     (lambda q: q.query("abc", q.Q("def"), ~q.Q(int_field=3)),
437 |      [('q', b'abc AND def AND NOT int_field:3')]),
438 |     (lambda q: q.query("abc", q.Q("def") & ~q.Q(int_field=3)),
439 |      [('q', b'abc AND def AND NOT int_field:3')]),
440 |     (lambda q: q.query("abc", q.Q("def") | ~q.Q(int_field=3)),
441 |      [('q', b'abc AND (def OR (*:* AND NOT int_field:3))')]),
442 |     (lambda q: q.query(q.Q("abc") | ~q.Q("def")),
443 |      [('q', b'abc OR (*:* AND NOT def)')]),
444 |     (lambda q: q.query(q.Q("abc") | q.Q(~q.Q("def"))),
445 |      [('q', b'abc OR (*:* AND NOT def)')]),
446 |     # Make sure that ANDs are flattened
447 |     (lambda q: q.query("def", q.Q("abc"), q.Q(q.Q("xyz"))),
448 |      [('q', b'abc AND def AND xyz')]),
449 |     # Make sure that ORs are flattened
450 |     (lambda q: q.query(q.Q("def") | q.Q(q.Q("xyz"))),
451 |      [('q', b'def OR xyz')]),
452 |     # Make sure that empty queries are discarded in ANDs
453 |     (lambda q: q.query("def", q.Q("abc"), q.Q(), q.Q(q.Q() & q.Q("xyz"))),
454 |      [('q', b'abc AND def AND xyz')]),
455 |     # Make sure that empty queries are discarded in ORs
456 |     (lambda q: q.query(q.Q() | q.Q("def") | q.Q(q.Q() | q.Q("xyz"))),
457 |      [('q', b'def OR xyz')]),
458 |     # Test cancellation of NOTs.
459 |     (lambda q: q.query(~q.Q(~q.Q("def"))),
460 |      [('q', b'def')]),
461 |     (lambda q: q.query(~q.Q(~q.Q(~q.Q("def")))),
462 |      [('q', b'NOT def')]),
463 |     # Test it works through sub-sub-queries
464 |     (lambda q: q.query(~q.Q(q.Q(q.Q(~q.Q(~q.Q("def")))))),
465 |      [('q', b'NOT def')]),
466 |     # Even with empty queries in there
467 |     (lambda q: q.query(~q.Q(q.Q(q.Q() & q.Q(q.Q() | ~q.Q(~q.Q("def")))))),
468 |      [('q', b'NOT def')]),
469 |     # Test escaping of AND, OR, NOT
470 |     (lambda q: q.query("AND", "OR", "NOT"),
471 |      [('q', b'"AND" AND "NOT" AND "OR"')]),
472 |     # Test exclude
473 |     (lambda q: q.query("blah").query(~q.Q(q.Q("abc") | q.Q("def") | q.Q("ghi"))),
474 |      [('q', b'blah AND NOT (abc OR def OR ghi)')]),
475 |     # Try boosts
476 |     (lambda q: q.query("blah").query(q.Q("def") ** 1.5),
477 |      [('q', b'blah AND def^1.5')]),
478 |     (lambda q: q.query("blah").query((q.Q("def") | q.Q("ghi")) ** 1.5),
479 |      [('q', b'blah AND (def OR ghi)^1.5')]),
480 |     (lambda q: q.query("blah").query(q.Q("def", ~q.Q("pqr") | q.Q("mno")) ** 1.5),
481 |      [('q', b'blah AND (def AND ((*:* AND NOT pqr) OR mno))^1.5')]),
482 |     # wildcard
483 |     (lambda q: q.query("blah").query(q.Q(WildcardString("def*"),
484 |                                          ~q.Q(miu=WildcardString("pqr*")) | q.Q("mno")) ** 1.5),
485 |      [('q', b'blah AND (def* AND ((*:* AND NOT miu:pqr*) OR mno))^1.5')]),
486 |     (lambda q: q.query("blah").query(q.Q("def*", ~q.Q(miu="pqr*") | q.Q("mno")) ** 1.5),
487 |      [('q', b'blah AND (def\\* AND ((*:* AND NOT miu:pqr\\*) OR mno))^1.5')]),
488 |     # And boost_relevancy
489 |     (lambda q: q.query("blah").boost_relevancy(1.5, int_field=3),
490 |      [('q', b'blah OR (blah AND int_field:3^1.5)')]),
491 |     (lambda q: q.query("blah").boost_relevancy(1.5, int_field=3).boost_relevancy(2, string_field='def'),
492 |      [('q', b'blah OR (blah AND (int_field:3^1.5 OR string_field:def^2))')]),
493 |     (lambda q: q.query("blah").query("blah2").boost_relevancy(1.5, int_field=3),
494 |      [('q', b'(blah AND blah2) OR (blah AND blah2 AND int_field:3^1.5)')]),
495 |     (lambda q: q.query(q.Q("blah") | q.Q("blah2")).boost_relevancy(1.5, int_field=3),
496 |      [('q', b'blah OR blah2 OR ((blah OR blah2) AND int_field:3^1.5)')]),
497 |     # And ranges
498 |     (lambda q: q.query(int_field__any=True),
499 |      [('q', b'int_field:[* TO *]')]),
500 |     (lambda q: q.query("blah", ~q.Q(int_field__any=True)),
501 |      [('q', b'blah AND NOT int_field:[* TO *]')]),
502 |     # facet
503 |     (lambda q: q.query("game").facet_query(price__lt=7).facet_query(price__gte=7),
504 |      [('facet', b'true'), ('facet.query', b'price:[7 TO *]'),
505 |       ('facet.query', b'price:{* TO 7}'), ('q', b'game')]),
506 |     # group
507 |     (lambda q: q.query().group_by('major_value', limit=10),
508 |      [('group', b'true'), ('group.field', b'major_value'), ('group.limit', b'10'),
509 |       ('group.ngroups', b'true'), ('q', b'*:*')]),
510 |     # highlight
511 |     (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).highlight('title'),
512 |      [('fq', b'text_field:tow'), ('hl', b'true'), ('hl.fl', b'title'), ('q', b'hello\\ world')]),
513 |     # termVector
514 |     (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).term_vector(df=True),
515 |      [('fq', b'text_field:tow'), ('tv', b'true'), ('tv.df', b'true'), ('q', b'hello\\ world')]),
516 |     # sort
517 |     (lambda q: q.query("hello world").filter(q.Q(text_field="tow")).sort_by('title'),
518 |      [('fq', b'text_field:tow'), ('q', b'hello\\ world'), ('sort', b'title asc')]),
519 |     # dismax
520 |     (lambda q: q.query("hello").filter(q.Q(text_field="tow")).alt_parser(
521 |         "dismax", qf={"text_field": 0.25, "string_field": 0.75}),
522 |      [('defType', b'dismax'), ('fq', b'text_field:tow'), ('q', b'hello'),
523 |       ('qf', b'text_field^0.25 string_field^0.75')]),
524 |     # edismax
525 |     (lambda q: q.query("hello").filter(q.Q(text_field="tow")).alt_parser(
526 |         "edismax", qf={"text_field": 0.25, "string_field": 0.75},
527 |         f={'alias1':['field1', 'field2']}
528 |         ),
529 |      [('defType', b'edismax'), ('fq', b'text_field:tow'), ('q', b'hello'),
530 |       ('qf', b'text_field^0.25 string_field^0.75'),
531 |       ('f.alias1.qf', b'field1 field2')]),
532 |     # field_limit
533 |     (lambda q: q.query().field_limit(['name', 'foo']),
534 |      [('fl', b'foo,name'), ('q', b'*:*')]),
535 |     (lambda q: q.query().field_limit('foo'),
536 |      [('fl', b'foo'), ('q', b'*:*')]),
537 |     # set_requesthandler
538 |     (lambda q: q.query("hello").set_requesthandler("foo"),
539 |      [('q', b'hello'), ('qt', b'foo')]),
540 |     # debug
541 |     (lambda q: q.query("hello").debug(),
542 |      [('debugQuery', b'true'), ('q', b'hello')]),
543 | )
544 | 
545 | 
546 | def check_complex_boolean_query(solr_search, query, output):
547 |     p = query(solr_search).params()
548 |     assert set(p) == set(output), "Unequal: %r, %r" % (p, output)
549 |     # And check no mutation of the base object
550 |     q = query(solr_search).params()
551 |     assert p == q, "Unequal: %r, %r" % (p, q)
552 | 
553 | 
554 | param_encode_data = (
555 |     ({"int": 3, "string": "string", "unicode": "unicode"},
556 |      [("int", b"3"), ("string", b"string"), ("unicode", b"unicode")]),
557 |     ({"int": 3, "string": "string", "unicode": "\N{UMBRELLA}nicode"},
558 |      [("int", b"3"), ("string", b"string"), ("unicode", b"\xe2\x98\x82nicode")]),
559 |     # python3 needs unicode as keys
560 |     ({"int": 3, "string": "string", "\N{UMBRELLA}nicode": "\N{UMBRELLA}nicode"},
561 |      [("int", b"3"), ("string", b"string"), ("\N{UMBRELLA}nicode", b"\xe2\x98\x82nicode")]),
562 |     ({"true": True, "false": False},
563 |      [("false", b"false"), ("true", b"true")]),
564 |     ({"list": ["first", "second", "third"]},
565 |      [("list", b"first"), ("list", b"second"), ("list", b"third")]),
566 | )
567 | 
568 | 
569 | def check_url_encode_data(kwargs, output):
570 |     p = params_from_dict(**kwargs)
571 |     assert p == output, "Unequal: %r, %r" % (p, output)
572 | 
573 | mlt_query_options_data = (
574 |     ('text_field', {}, {},
575 |      [('mlt.fl', b'text_field'), ('q', b'*:*')]),
576 |     (['string_field', 'text_field'], {'string_field': 3.0}, {},
577 |      [('mlt.fl', b'string_field,text_field'), ('mlt.qf', b'string_field^3.0'),
578 |       ('q', b'*:*')]),
579 |     ('text_field', {}, {'mindf': 3, 'interestingTerms': 'details'},
580 |      [('mlt.fl', b'text_field'), ('mlt.interestingTerms', b'details'),
581 |       ('mlt.mindf', b'3'), ('q', b'*:*')]),
582 | )
583 | 
584 | 
585 | def check_mlt_query_options(fields, query_fields, kwargs, output):
586 |     q = MltSolrSearch(None, content="This is the posted content.")
587 |     q = q.mlt(fields, query_fields=query_fields, **kwargs)
588 |     assert q.params() == output
589 | 
590 | 
591 | def flatten(test_data):
592 |     new_data = []
593 |     for method, data in test_data.items():
594 |         for row in data:
595 |             if isinstance(row, (list, tuple)):
596 |                 new_data.append([method, *row])
597 |             else:
598 |                 new_data.append([method, row])
599 |     return new_data
600 | 
601 | 
602 | @pytest.mark.parametrize(
603 |         "method,args,kwargs,expected", flatten(good_query_data))
604 | def test_query_data(method, args, kwargs, expected):
605 |     check_query_data(method, args, kwargs, expected)
606 | 
607 | 
608 | @pytest.mark.parametrize(
609 |         "method,args,kwargs,expected", flatten(base_good_query_data))
610 | def test_mlt_query_data(method, args, kwargs, expected):
611 |     check_mlt_query_data(method, args, kwargs, expected)
612 | 
613 | 
614 | @pytest.mark.parametrize(
615 |         "option_class,kwargs,expected", flatten(good_option_data))
616 | def test_good_option_data(option_class, kwargs, expected):
617 |     check_good_option_data(option_class, kwargs, expected)
618 | 
619 | 
620 | @pytest.mark.parametrize("option_class,kwargs", flatten(bad_option_data))
621 | def test_bad_option_data(option_class, kwargs):
622 |     check_bad_option_data(option_class, kwargs)
623 | 
624 | 
625 | @pytest.mark.parametrize("query,expected", complex_boolean_queries)
626 | def test_complex_boolean_queries(query, expected):
627 |     solr_search = SolrSearch(None)
628 |     check_complex_boolean_query(solr_search, query, expected)
629 | 
630 | 
631 | @pytest.mark.parametrize("kwargs, expected", param_encode_data)
632 | def test_url_encode_data(kwargs, expected):
633 |     check_url_encode_data(kwargs, expected)
634 | 
635 | 
636 | @pytest.mark.parametrize(
637 |         "fields,query_fields,kwargs,expected", mlt_query_options_data)
638 | def test_mlt_query_options(fields, query_fields, kwargs, expected):
639 |     check_mlt_query_options(fields, query_fields, kwargs, expected)
640 | 
641 | 
642 | def test_is_iter():
643 |     assert is_iter("abc") == False
644 |     assert is_iter(1) == False
645 |     assert is_iter([1, 2]) == True
646 |     assert is_iter((1, 2)) == True
647 |     assert is_iter(set([1, 2])) == True
648 | 


--------------------------------------------------------------------------------
/scorched/tests/test_strings.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from scorched.search import LuceneQuery
 4 | from scorched.strings import RawString, WildcardString
 5 | 
 6 | 
 7 | class TestStrings(unittest.TestCase):
 8 |     def test_string_escape(self):
 9 |         """Ensure that string characters are escaped correctly for Solr queries."""
10 |         test_str = '+-&|!(){}[]^"~*?: \t\v\\/'
11 |         escaped = RawString(test_str).escape_for_lqs_term()
12 |         self.assertEqual(
13 |             escaped,
14 |             '\\+\\-\\&\\|\\!\\(\\)\\{\\}\\[\\]\\^\\"\\~\\*\\?\\:\\ \\\t\\\x0b\\\\\\/',
15 |         )
16 | 
17 |     def test_wildcard_string(self):
18 |         q = LuceneQuery()
19 |         q = q.Q(WildcardString("occurrencetype$$pressemitteilung$$*"))
20 |         output = {None: "occurrencetype$$pressemitteilung$$*"}
21 |         self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output))
22 |         # slash
23 |         q = q.Q(WildcardString("occu/*/baum"))
24 |         output = {None: "occu\\/*\\/baum"}
25 |         self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output))
26 |         # backslash
27 |         q = q.Q(WildcardString("occu\\*baum\\?aus\\"))
28 |         output = {None: "occu\\*baum\\?aus\\\\"}
29 |         self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output))
30 |         # question mark
31 |         q = q.Q(WildcardString("occ?/*/baum"))
32 |         output = {None: "occ?\\/*\\/baum"}
33 |         self.assertEqual(q.options(), output, "Unequal: %r, %r" % (q.options(), output))
34 | 


--------------------------------------------------------------------------------
/scorched/tests/test_testing.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import scorched.testing
 3 | from unittest import mock
 4 | 
 5 | 
 6 | class TestTesting(unittest.TestCase):
 7 | 
 8 |     def test_solr(self):
 9 |         self.assertRaises(Exception,
10 |                           scorched.testing.is_solr_available("http://foo"))
11 | 
12 |     def test_solr_decorator(self):
13 |         with mock.patch.object(scorched.testing, "is_solr_available",
14 |                                return_value=False):
15 |             func = lambda x: x
16 |             self.assertTrue(hasattr(scorched.testing.skip_unless_solr(func),
17 |                                     '__unittest_skip_why__'))
18 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [nosetests]
2 | match=^test
3 | nocapture=1
4 | cover-package=scorched
5 | with-coverage=1
6 | cover-erase=1
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | import os
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | version = "1.0.0.0b3.dev0"
 8 | 
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | README = open(os.path.join(here, "README.rst")).read()
11 | CHANGES = open(os.path.join(here, "CHANGES.rst")).read()
12 | 
13 | 
14 | setup(
15 |     name="scorched",
16 |     version=version,
17 |     description="solr search orm like query builder",
18 |     long_description=README + "\n\n" + CHANGES,
19 |     classifiers=[
20 |         "Environment :: Console",
21 |         "Environment :: Web Environment",
22 |         "Intended Audience :: Developers",
23 |         "License :: OSI Approved :: MIT License",
24 |         "Operating System :: MacOS :: MacOS X",
25 |         "Operating System :: Microsoft :: Windows",
26 |         "Operating System :: POSIX",
27 |         "Programming Language :: Python :: 3",
28 |         "Programming Language :: Python :: 3.7",
29 |         "Programming Language :: Python :: 3.8",
30 |         "Programming Language :: Python :: 3.9",
31 |         "Programming Language :: Python :: 3.10",
32 |     ],
33 |     keywords="solr tow sunburnt offspring",
34 |     author="(Josip Delic) Lugensa GmbH",
35 |     author_email="info@lugensa.com",
36 |     url="http://www.lugensa.com",
37 |     license="MIT",
38 |     packages=find_packages(exclude=["ez_setup", "examples", "tests"]),
39 |     include_package_data=True,
40 |     zip_safe=False,
41 |     python_requires=">=3.7.0",
42 |     install_requires=[
43 |         "setuptools",
44 |         "requests",
45 |         "pytz",
46 |     ],
47 |     extras_require={
48 |         "test": ["pytest<7.0.0", "coverage", "pytest-docker"],
49 |     },
50 |     test_suite="scorched.tests",
51 | )
52 | 


--------------------------------------------------------------------------------
/testing-solr.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | SOLR_PORT=${SOLR_PORT:-8983}
  4 | SOLR_VERSION=${SOLR_VERSION:-4.10.2}
  5 | DEBUG=${DEBUG:-false}
  6 | SOLR_CORE=${SOLR_CORE:-core0}
  7 | 
  8 | download() {
  9 |     FILE="$2.tgz"
 10 |     if [ -f $FILE ];
 11 |     then
 12 |        echo "File $FILE exists."
 13 |        tar -zxf $FILE
 14 |     else
 15 |        echo "File $FILE does not exist. Downloading solr from $1..."
 16 |        curl -O $1
 17 |        tar -zxf $FILE
 18 |     fi
 19 |     echo "Downloaded!"
 20 | }
 21 | 
 22 | is_solr_up(){
 23 |     echo "Checking if Solr is up on http://localhost:$SOLR_PORT/solr/admin/cores"
 24 |     http_code=`echo $(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$SOLR_PORT/solr/admin/cores")`
 25 |     return `test $http_code = "200"`
 26 | }
 27 | 
 28 | wait_for_solr(){
 29 |     while ! is_solr_up; do
 30 |         sleep 5
 31 |     done
 32 | }
 33 | 
 34 | run() {
 35 |     dir_name=$1
 36 |     solr_port=$2
 37 |     solr_core=$3
 38 |     # Run solr
 39 |     echo "Running with folder $dir_name"
 40 |     echo "Starting solr on port ${solr_port}..."
 41 | 
 42 |     # go to the solr folder
 43 |     cd $1/example
 44 | 
 45 |     if [ "$DEBUG" = "true" ]
 46 |     then
 47 |         java -Djetty.port=$solr_port -Dsolr.solr.home=multicore -jar start.jar &
 48 |     else
 49 |         java -Djetty.port=$solr_port -Dsolr.solr.home=multicore -jar start.jar > /dev/null 2>&1 &
 50 |     fi
 51 |     wait_for_solr
 52 |     cd ../../
 53 |     echo "Started"
 54 | }
 55 | 
 56 | 
 57 | download_and_run() {
 58 |     case $1 in
 59 |         3.5.0)
 60 |             url="http://archive.apache.org/dist/lucene/solr/3.5.0/apache-solr-3.5.0.tgz"
 61 |             dir_name="apache-solr-3.5.0"
 62 |             dir_conf="conf/"
 63 |             ;;
 64 |         3.6.0)
 65 |             url="http://archive.apache.org/dist/lucene/solr/3.6.0/apache-solr-3.6.0.tgz"
 66 |             dir_name="apache-solr-3.6.0"
 67 |             dir_conf="conf/"
 68 |             ;;
 69 |         3.6.1)
 70 |             url="http://archive.apache.org/dist/lucene/solr/3.6.1/apache-solr-3.6.1.tgz"
 71 |             dir_name="apache-solr-3.6.1"
 72 |             dir_conf="conf/"
 73 |             ;;
 74 |         3.6.2)
 75 |             url="http://archive.apache.org/dist/lucene/solr/3.6.2/apache-solr-3.6.2.tgz"
 76 |             dir_name="apache-solr-3.6.2"
 77 |             dir_conf="conf/"
 78 |             ;;
 79 |         4.0.0)
 80 |             url="http://archive.apache.org/dist/lucene/solr/4.0.0/apache-solr-4.0.0.tgz"
 81 |             dir_name="apache-solr-4.0.0"
 82 |             dir_conf="collection1/conf/"
 83 |             ;;
 84 |         4.1.0)
 85 |             url="http://archive.apache.org/dist/lucene/solr/4.1.0/solr-4.1.0.tgz"
 86 |             dir_name="solr-4.1.0"
 87 |             dir_conf="collection1/conf/"
 88 |             ;;
 89 |         4.2.0)
 90 |             url="http://archive.apache.org/dist/lucene/solr/4.2.0/solr-4.2.0.tgz"
 91 |             dir_name="solr-4.2.0"
 92 |             dir_conf="collection1/conf/"
 93 |             ;;
 94 |         4.2.1)
 95 |             url="http://archive.apache.org/dist/lucene/solr/4.2.1/solr-4.2.1.tgz"
 96 |             dir_name="solr-4.2.1"
 97 |             dir_conf="collection1/conf/"
 98 |             ;;
 99 |         4.3.1)
100 |             url="http://archive.apache.org/dist/lucene/solr/4.3.1/solr-4.3.1.tgz"
101 |             dir_name="solr-4.3.1"
102 |             dir_conf="collection1/conf/"
103 |             ;;
104 |         4.4.0)
105 |             url="http://archive.apache.org/dist/lucene/solr/4.4.0/solr-4.4.0.tgz"
106 |             dir_name="solr-4.4.0"
107 |             dir_conf="collection1/conf/"
108 |             ;;
109 |         4.5.0)
110 |             url="http://archive.apache.org/dist/lucene/solr/4.5.0/solr-4.5.0.tgz"
111 |             dir_name="solr-4.5.0"
112 |             dir_conf="collection1/conf/"
113 |             ;;
114 |         4.5.1)
115 |             url="http://archive.apache.org/dist/lucene/solr/4.5.1/solr-4.5.1.tgz"
116 |             dir_name="solr-4.5.1"
117 |             dir_conf="collection1/conf/"
118 |             ;;
119 |         4.6.0)
120 |             url="http://archive.apache.org/dist/lucene/solr/4.6.0/solr-4.6.0.tgz"
121 |             dir_name="solr-4.6.0"
122 |             dir_conf="collection1/conf/"
123 |             ;;
124 |         4.6.1)
125 |             url="http://archive.apache.org/dist/lucene/solr/4.6.1/solr-4.6.1.tgz"
126 |             dir_name="solr-4.6.1"
127 |             dir_conf="collection1/conf/"
128 |             ;;
129 |         4.7.0)
130 |             url="http://archive.apache.org/dist/lucene/solr/4.7.0/solr-4.7.0.tgz"
131 |             dir_name="solr-4.7.0"
132 |             dir_conf="collection1/conf/"
133 |             ;;
134 |         4.7.1)
135 |             url="http://archive.apache.org/dist/lucene/solr/4.7.1/solr-4.7.1.tgz"
136 |             dir_name="solr-4.7.1"
137 |             dir_conf="collection1/conf/"
138 |             ;;
139 |         4.7.2)
140 |             url="http://archive.apache.org/dist/lucene/solr/4.7.2/solr-4.7.2.tgz"
141 |             dir_name="solr-4.7.2"
142 |             dir_conf="collection1/conf/"
143 |             ;;
144 |         4.8.0)
145 |             url="http://archive.apache.org/dist/lucene/solr/4.8.0/solr-4.8.0.tgz"
146 |             dir_name="solr-4.8.0"
147 |             dir_conf="collection1/conf/"
148 |             ;;
149 |         4.8.1)
150 |             url="http://archive.apache.org/dist/lucene/solr/4.8.1/solr-4.8.1.tgz"
151 |             dir_name="solr-4.8.1"
152 |             dir_conf="collection1/conf/"
153 |             ;;
154 |         4.9.0)
155 |             url="http://archive.apache.org/dist/lucene/solr/4.9.0/solr-4.9.0.tgz"
156 |             dir_name="solr-4.9.0"
157 |             dir_conf="collection1/conf/"
158 |             ;;
159 |         4.9.1)
160 |             url="http://archive.apache.org/dist/lucene/solr/4.9.1/solr-4.9.1.tgz"
161 |             dir_name="solr-4.9.1"
162 |             dir_conf="collection1/conf/"
163 |             ;;
164 |         4.10.2)
165 |             url="http://archive.apache.org/dist/lucene/solr/4.10.2/solr-4.10.2.tgz"
166 |             dir_name="solr-4.10.2"
167 |             dir_conf="collection1/conf/"
168 |             ;;
169 |     esac
170 | 
171 |     download $url $dir_name
172 |     add_core $dir_name $dir_conf $SOLR_CORE $SOLR_CONFS
173 |     run $dir_name $SOLR_PORT $SOLR_CORE
174 | 
175 |     if [ -z "${SOLR_DOCS}" ]
176 |     then
177 |         echo "$solr_docs not defined, skipping initial indexing"
178 |     else
179 |         post_documents $dir_name $SOLR_DOCS $SOLR_CORE $SOLR_PORT
180 |     fi
181 | }
182 | 
183 | add_core() {
184 |     dir_name=$1
185 |     dir_conf=$2
186 |     solr_core=$3
187 |     solr_confs=$4
188 |     # prepare our folders
189 |     [[ -d "${dir_name}/example/multicore/${solr_core}" ]] || mkdir $dir_name/example/multicore/$solr_core
190 |     [[ -d "${dir_name}/example/multicore/${solr_core}/conf" ]] || mkdir $dir_name/example/multicore/$solr_core/conf
191 | 
192 |     # copy full solr example first
193 |     cp -R $dir_name/example/solr/$dir_conf/* $dir_name/example/multicore/$solr_core/conf
194 | 
195 |     # overwrite with custom configurations
196 |     if [ -d "${solr_confs}" ] ; then
197 |       cp -R $solr_confs/* $dir_name/example/multicore/$solr_core/conf/
198 |     else
199 |       for file in $solr_confs
200 |       do
201 |         if [ -f "${file}" ]; then
202 |             cp $file $dir_name/example/multicore/$solr_core/conf
203 |             echo "Copied $file into solr conf directory."
204 |         else
205 |             echo "${file} is not valid";
206 |             exit 1
207 |         fi
208 |       done
209 |     fi
210 | }
211 | 
212 | post_documents() {
213 |     dir_name=$1
214 |     solr_docs=$2
215 |     solr_core=$3
216 |     solr_port=$4
217 |       # Post documents
218 |     if [ -z "${solr_docs}" ]
219 |     then
220 |         echo "SOLR_DOCS not defined, skipping initial indexing"
221 |     else
222 |         echo "Indexing $solr_docs"
223 |         java -Dtype=application/json -Durl=http://localhost:$solr_port/solr/$solr_core/update/json -jar $dir_name/example/exampledocs/post.jar $solr_docs
224 |     fi
225 | }
226 | 
227 | check_version() {
228 |     case $1 in
229 |         3.5.0|3.6.0|3.6.1|3.6.2|4.0.0|4.1.0|4.2.0|4.2.1|4.3.1|4.4.0|4.5.0|4.5.1|4.6.0|4.6.1|4.7.0|4.7.1|4.7.2|4.8.0|4.8.1|4.9.0|4.9.1|4.10.2);;
230 |         *)
231 |             echo "Sorry, $1 is not supported or not valid version."
232 |             exit 1
233 |             ;;
234 |     esac
235 | }
236 | 
237 | check_version $SOLR_VERSION
238 | download_and_run $SOLR_VERSION
239 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py37,py38,py39,py310
 3 | 
 4 | [testenv]
 5 | recreate = true
 6 | setenv =
 7 |     TEST_DIR=scorched/tests/
 8 |     PACKAGE_DIR=scorched
 9 | extras = test
10 | commands =
11 |     py.test {posargs: --junitxml junit-{envname}.xml --cov {env:PACKAGE_DIR} --cov-report xml:coverage-{envname}.xml {env:TEST_DIR}}
12 | usedevelop = True
13 | deps =
14 |     pytest < 7.0.0
15 |     pytest-cov
16 |     pytest-docker
17 | 


--------------------------------------------------------------------------------