├── .coveragerc
├── .editorconfig
├── .github
    └── workflows
    │   └── tox.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierrc
├── .ruff.toml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── docker-compose.yml
├── elasticsearch_django
    ├── __init__.py
    ├── admin.py
    ├── apps.py
    ├── context_managers.py
    ├── decorators.py
    ├── index.py
    ├── management
    │   ├── __init__.py
    │   └── commands
    │   │   ├── __init__.py
    │   │   ├── create_search_index.py
    │   │   ├── delete_search_index.py
    │   │   ├── prune_search_index.py
    │   │   ├── rebuild_search_index.py
    │   │   └── update_search_index.py
    ├── mappings
    │   └── README.md
    ├── migrations
    │   ├── 0001_initial.py
    │   ├── 0002_searchquery_duration.py
    │   ├── 0003_auto_20160926_2021.py
    │   ├── 0004_auto_20161129_1135.py
    │   ├── 0005_convert_JSONFields.py
    │   ├── 0006_add_encoder_JSONField_kwarg.py
    │   ├── 0007_update_json_field_encoders.py
    │   ├── 0008_searchquery_search_terms.py
    │   ├── 0009_searchquery_query_type.py
    │   ├── 0010_searchquery_total_hits_relation.py
    │   ├── 0011_searchquery_aggregations.py
    │   ├── 0012_alter_searchquery_aggregations_and_more.py
    │   └── __init__.py
    ├── models.py
    ├── py.typed
    ├── settings.py
    └── signals.py
├── manage.py
├── mappings
    └── examples.json
├── mypy.ini
├── poetry.toml
├── pyproject.toml
├── pytest.ini
├── tests
    ├── __init__.py
    ├── admin.py
    ├── apps.py
    ├── migrations
    │   ├── 0001_initial.py
    │   ├── 0002_examplemodel_user.py
    │   ├── 0003_examplemodelwithcustomprimarykey.py
    │   ├── 0004_modela_modelb.py
    │   └── __init__.py
    ├── models.py
    ├── settings.py
    ├── test_apps.py
    ├── test_commands.py
    ├── test_decorators.py
    ├── test_index_functions.py
    ├── test_models.py
    ├── test_settings.py
    └── urls.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | # .coveragerc to control coverage.py
2 | [run]
3 | branch = True
4 | omit = tests/*
5 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | indent_style = space
 6 | indent_size = 4
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | 


--------------------------------------------------------------------------------
/.github/workflows/tox.yml:
--------------------------------------------------------------------------------
 1 | name: Python / Django
 2 | 
 3 | on:
 4 |     push:
 5 |         branches:
 6 |             - master
 7 | 
 8 |     pull_request:
 9 |         types: [opened, synchronize, reopened]
10 | 
11 | jobs:
12 |     format:
13 |         name: Check formatting
14 |         runs-on: ubuntu-latest
15 |         strategy:
16 |             matrix:
17 |                 toxenv: [fmt, lint, mypy]
18 |         env:
19 |             TOXENV: ${{ matrix.toxenv }}
20 | 
21 |         steps:
22 |             - name: Check out the repository
23 |               uses: actions/checkout@v4
24 | 
25 |             - name: Set up Python 3.11
26 |               uses: actions/setup-python@v1
27 |               with:
28 |                   python-version: "3.11"
29 | 
30 |             - name: Install and run tox
31 |               run: |
32 |                   pip install tox
33 |                   tox
34 | 
35 |     test:
36 |         name: Run tests
37 |         runs-on: ubuntu-latest
38 |         strategy:
39 |             matrix:
40 |                 python: ["3.8", "3.9", "3.10", "3.11", "3.12"]
41 |                 django: ["32", "41", "50", "main"]
42 |                 exclude:
43 |                     - django: "50"
44 |                       python: "3.8"
45 |                     - django: "50"
46 |                       python: "3.9"
47 |                     - django: "main"
48 |                       python: "3.8"
49 |                     - django: "main"
50 |                       python: "3.9"
51 | 
52 |         env:
53 |             TOXENV: py${{ matrix.python }}-django${{ matrix.django }}
54 | 
55 |         steps:
56 |             - name: Check out the repository
57 |               uses: actions/checkout@v4
58 | 
59 |             - name: Set up Python ${{ matrix.python }}
60 |               uses: actions/setup-python@v4
61 |               with:
62 |                   python-version: ${{ matrix.python }}
63 | 
64 |             - name: Install and run tox
65 |               run: |
66 |                   pip install tox
67 |                   tox
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .tox
 3 | *.egg-info
 4 | .coverage
 5 | .DS_Store
 6 | staticfiles
 7 | Pipfile
 8 | Pipfile.lock
 9 | poetry.lock
10 | pythonenv*
11 | dist/
12 | test.db
13 | elasticsearch_django.db
14 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
 4 |     rev: "v0.0.264"
 5 |     hooks:
 6 |     - id: ruff
 7 |       args: [--fix, --exit-non-zero-on-fix]
 8 | 
 9 |   # python code formatting - will amend files
10 |   - repo: https://github.com/ambv/black
11 |     rev: 23.1.0
12 |     hooks:
13 |       - id: black
14 |         additional_dependencies:
15 |           - platformdirs
16 | 
17 |   # python static type checking
18 |   - repo: https://github.com/pre-commit/mirrors-mypy
19 |     rev: v1.1.1
20 |     hooks:
21 |       - id: mypy
22 |         args:
23 |           - --disallow-untyped-defs
24 |           - --disallow-incomplete-defs
25 |           - --check-untyped-defs
26 |           - --no-implicit-optional
27 |           - --ignore-missing-imports
28 |           - --follow-imports=silent
29 |         exclude: ^tests
30 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |     "printWidth": 100,
 3 |     "tabWidth": 4,
 4 |     "useTabs": false,
 5 |     "semi": true,
 6 |     "singleQuote": false,
 7 |     "trailingComma": "none",
 8 |     "bracketSpacing": true,
 9 |     "jsxBracketSameLine": false,
10 |     "proseWrap": "always",
11 |     "endOfLine": "auto"
12 | }
13 | 


--------------------------------------------------------------------------------
/.ruff.toml:
--------------------------------------------------------------------------------
 1 | line-length = 88
 2 | ignore = [
 3 |     "D100",  # Missing docstring in public module
 4 |     "D101",  # Missing docstring in public class
 5 |     "D102",  # Missing docstring in public method
 6 |     "D103",  # Missing docstring in public function
 7 |     "D104",  # Missing docstring in public package
 8 |     "D105",  # Missing docstring in magic method
 9 |     "D106",  # Missing docstring in public nested class
10 |     "D107",  # Missing docstring in __init__
11 |     "D203",  # 1 blank line required before class docstring
12 |     "D212",  # Multi-line docstring summary should start at the first line
13 |     "D213",  # Multi-line docstring summary should start at the second line
14 |     "D404",  # First word of the docstring should not be "This"
15 |     "D405",  # Section name should be properly capitalized
16 |     "D406",  # Section name should end with a newline
17 |     "D407",  # Missing dashed underline after section
18 |     "D410",  # Missing blank line after section
19 |     "D411",  # Missing blank line before section
20 |     "D412",  # No blank lines allowed between a section header and its content
21 |     "D416",  # Section name should end with a colon
22 |     "D417",
23 |     "D417",  # Missing argument description in the docstring
24 | ]
25 | select = [
26 |     "A",  # flake8 builtins
27 |     "C9", # mcabe
28 |     "D",  # pydocstyle
29 |     "E",  # pycodestyle (errors)
30 |     "F",  # Pyflakes
31 |     "I",  # isort
32 |     "S",  # flake8-bandit
33 |     "T2", # flake8-print
34 |     "W",  # pycodestype (warnings)
35 | ]
36 | 
37 | [isort]
38 | combine-as-imports = true
39 | 
40 | [mccabe]
41 | max-complexity = 8
42 | 
43 | [per-file-ignores]
44 | "*tests/*" = [
45 |     "D205",  # 1 blank line required between summary line and description
46 |     "D400",  # First line should end with a period
47 |     "D401",  # First line should be in imperative mood
48 |     "D415",  # First line should end with a period, question mark, or exclamation point
49 |     "E501",  # Line too long
50 |     "E731",  # Do not assign a lambda expression, use a def
51 |     "S101",  # Use of assert detected
52 |     "S105",  # Possible hardcoded password
53 |     "S106",  # Possible hardcoded password
54 |     "S113",  # Probable use of requests call with timeout set to {value}
55 | ]
56 | "*/migrations/*" = [
57 |     "E501",  # Line too long
58 | ]
59 | "*/settings.py" = [
60 |     "F403",  # from {name} import * used; unable to detect undefined names
61 |     "F405",  # {name} may be undefined, or defined from star imports:
62 | ]
63 | "*/settings/*" = [
64 |     "F403",  # from {name} import * used; unable to detect undefined names
65 |     "F405",  # {name} may be undefined, or defined from star imports:
66 | ]
67 | "*/management/commands/*" = [
68 |     "A003",  # Class attribute `{help}` is shadowing a python builtin
69 | ]
70 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | ## v8.5.2
 6 | 
 7 | - Add py.typed typing marker (h/t @0x416E64)
 8 | 
 9 | ## v8.5.1
10 | 
11 | - Add support for db alias
12 | - Add "include_source" setting (default: True)
13 | - Add Django 5.0 to build matrix
14 | 
15 | ## v8.4
16 | 
17 | - Adds "fields" to the stored hits JSON (if present) [#72]
18 | 
19 | ## v8.3
20 | 
21 | - Adds raw search question response object to SearchQuery (`SearchQuery.query_response`)
22 | 
23 | ## v8.2
24 | 
25 | - Adds support for complex client configuration [#68](https://github.com/yunojuno/elasticsearch-django/issues/68) (h/t @ColeDCrawford)
26 | 
27 | ### v8.1.2
28 | 
29 | - Fixes `disable_search_updates` decorator [#65](https://github.com/yunojuno/elasticsearch-django/issues/65)
30 | 
31 | ## v8.0
32 | 
33 | This is a non-functional release - updating the Python, Django and
34 | Elasticsearch version support. It will break if you are using an
35 | unsupported version of any of the above, but should work without
36 | modification if not.
37 | 
38 | - Adds support for Python 3.11
39 | - Adds support for Django 4.0, 4.1
40 | - Adds support for Elasticsearch 8.x
41 | - Adds support for custom model primary keys
42 | 
43 | - Removes support for Django 3.0, 3.1
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT Licence (MIT)
 2 | 
 3 | Copyright (c) 2022 YunoJuno Ltd
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | **This project now requires Python 3.8+ and Django 3.2+.
  2 | For previous versions please refer to the relevant tag or branch.**
  3 | 
  4 | # Elasticsearch for Django
  5 | 
  6 | This is a lightweight Django app for people who are using Elasticsearch
  7 | with Django, and want to manage their indexes.
  8 | 
  9 | ## Compatibility
 10 | 
 11 | The master branch is now based on `elasticsearch-py` 8. If you are
 12 | using older versions, please switch to the relevant branch (released on
 13 | PyPI as 2.x, 5.x, 6.x).
 14 | 
 15 | ## Search Index Lifecycle
 16 | 
 17 | The basic lifecycle for a search index is simple:
 18 | 
 19 | 1. Create an index
 20 | 2. Post documents to the index
 21 | 3. Query the index
 22 | 
 23 | Relating this to our use of search within a Django project it looks like this:
 24 | 
 25 | 1. Create mapping file for a named index
 26 | 2. Add index configuration to Django settings
 27 | 3. Map models to document types in the index
 28 | 4. Post document representation of objects to the index
 29 | 5. Update the index when an object is updated
 30 | 6. Remove the document when an object is deleted
 31 | 7. Query the index
 32 | 8. Convert search results into a QuerySet (preserving relevance)
 33 | 
 34 | # Django Implementation
 35 | 
 36 | This section shows how to set up Django to recognise ES indexes, and the
 37 | models that should appear in an index. From this setup you should be
 38 | able to run the management commands that will create and populate each
 39 | index, and keep the indexes in sync with the database.
 40 | 
 41 | ## Create index mapping file
 42 | 
 43 | The prerequisite to configuring Django to work with an index is having
 44 | the mapping for the index available. This is a bit chicken-and-egg, but
 45 | the underlying assumption is that you are capable of creating the index
 46 | mappings outside of Django itself, as raw JSON. (The easiest way to
 47 | spoof this is to POST a JSON document representing your document type at
 48 | URL on your ES instance (`POST http://ELASTICSEARCH_URL/{{index_name}}`)
 49 | and then retrieving the auto-magic mapping that ES created via `GET
 50 | http://ELASTICSEARCH_URL/{{index_name}}/_mapping`.)
 51 | 
 52 | Once you have the JSON mapping, you should save it in the root of the
 53 | Django project as `search/mappings/{{index_name}}.json`.
 54 | 
 55 | ## Configure Django settings
 56 | 
 57 | The Django settings for search are contained in a dictionary called
 58 | `SEARCH_SETTINGS`, which should be in the main `django.conf.settings`
 59 | file. The dictionary has three root nodes, `connections`, `indexes` and
 60 | `settings`. Below is an example:
 61 | 
 62 | ```python
 63 | 
 64 |     SEARCH_SETTINGS = {
 65 |         'connections': {
 66 |             'default': getenv('ELASTICSEARCH_URL'),
 67 |             'backup': {
 68 |                 # all Elasticsearch init kwargs can be used here
 69 |                 'cloud_id': '{{ cloud_id }}'
 70 |             }
 71 |         },
 72 |         'indexes': {
 73 |             'blog': {
 74 |                 'models': [
 75 |                     'website.BlogPost',
 76 |                 ]
 77 |             }
 78 |         },
 79 |         'settings': {
 80 |             # batch size for ES bulk api operations
 81 |             'chunk_size': 500,
 82 |             # default page size for search results
 83 |             'page_size': 25,
 84 |             # set to True to connect post_save/delete signals
 85 |             'auto_sync': True,
 86 |             # List of models which will never auto_sync even if auto_sync is True
 87 |             'never_auto_sync': [],
 88 |             # if true, then indexes must have mapping files
 89 |             'strict_validation': False
 90 |         }
 91 |     }
 92 | ```
 93 | 
 94 | The `connections` node is (hopefully) self-explanatory - we support
 95 | multiple connections, but in practice you should only need the one -
 96 | 'default' connection. This is the URL used to connect to your ES
 97 | instance. The `settings` node contains site-wide search settings. The
 98 | `indexes` nodes is where we configure how Django and ES play together,
 99 | and is where most of the work happens.
100 | 
101 | Note that prior to v8.2 the connection value had to be a connection
102 | string; since v8.2 this can still be a connection string, but can also
103 | be a dictionary that contains any kwarg that can be passed to the
104 | `Elasticsearch` init method.
105 | 
106 | **Index settings**
107 | 
108 | Inside the index node we have a collection of named indexes - in this
109 | case just the single index called `blog`. Inside each index we have a
110 | `models` key which contains a list of Django models that should appear
111 | in the index, denoted in `app.ModelName` format. You can have multiple
112 | models in an index, and a model can appear in multiple indexes. How
113 | models and indexes interact is described in the next section.
114 | 
115 | **Configuration Validation**
116 | 
117 | When the app boots up it validates the settings, which involves the
118 | following:
119 | 
120 | 1. Do each of the indexes specified have a mapping file?
121 | 2. Do each of the models implement the required mixins?
122 | 
123 | ## Implement search document mixins
124 | 
125 | So far we have configured Django to know the names of the indexes we
126 | want, and the models that we want to index. What it doesn't yet know is
127 | which objects to index, and how to convert an object to its search index
128 | document. This is done by implementing two separate mixins -
129 | `SearchDocumentMixin` and `SearchDocumentManagerMixin`. The
130 | configuration validation routine will tell you if these are not
131 | implemented. **SearchDocumentMixin**
132 | 
133 | This mixin is responsible for the seaerch index document format. We are
134 | indexing JSON representations of each object, and we have two methods on
135 | the mixin responsible for outputting the correct format -
136 | `as_search_document` and `as_search_document_update`.
137 | 
138 | An aside on the mechanics of the `auto_sync` process, which is hooked up
139 | using Django's `post_save` and `post_delete` model signals. ES supports
140 | partial updates to documents that already exist, and we make a
141 | fundamental assumption about indexing models - that **if you pass the
142 | `update_fields` kwarg to a `model.save` method call, then you are
143 | performing a partial update**, and this will be propagated to ES as a
144 | partial update only.
145 | 
146 | To this end, we have two methods for generating the model's JSON
147 | representation - `as_search_document`, which should return a dict that
148 | represents the entire object; and `as_search_document_update`, which
149 | takes the `update_fields` kwarg. This method handler two partial update
150 | 'strategies', defined in the `SEARCH_SETTINGS`, 'full' and 'partial'.
151 | The default 'full' strategy simply proxies the `as_search_document`
152 | method - i.e. partial updates are treated as a full document update. The
153 | 'partial' strategy is more intelligent - it will map the update_fields
154 | specified to the field names defined in the index mapping files. If a
155 | field name is passed into the save method but is not in the mapping
156 | file, it is ignored. In addition, if the underlying Django model field
157 | is a related object, a `ValueError` will be raised, as we cannot
158 | serialize this automatically. In this scenario, you will need to
159 | override the method in your subclass - see the code for more details.
160 | 
161 | To better understand this, let us say that we have a model (`MyModel`)
162 | that is configured to be included in an index called `myindex`. If we
163 | save an object, without passing `update_fields`, then this is considered
164 | a full document update, which triggers the object's
165 | `index_search_document` method:
166 | 
167 | ```python
168 | obj = MyModel.objects.first()
169 | obj.save()
170 | ...
171 | # AUTO_SYNC=true will trigger a re-index of the complete object document:
172 | obj.index_search_document(index='myindex')
173 | ```
174 | 
175 | However, if we only want to update a single field (say the `timestamp`),
176 | and we pass this in to the save method, then this will trigger the
177 | `update_search_document` method, passing in the names of the fields that
178 | we want updated.
179 | 
180 | ```python
181 | # save a single field on the object
182 | obj.save(update_fields=['timestamp'])
183 | ...
184 | # AUTO_SYNC=true will trigger a partial update of the object document
185 | obj.update_search_document(index, update_fields=['timestamp'])
186 | ```
187 | 
188 | We pass the name of the index being updated as the first arg, as objects may have different representations in different indexes:
189 | 
190 | ```python
191 |     def as_search_document(self, index):
192 |         return {'name': "foo"} if index == 'foo' else {'name': "bar"}
193 | ```
194 | 
195 | In the case of the second method, the simplest possible implementation
196 | would be a dictionary containing the names of the fields being updated
197 | and their new values, and this is the default implementation. If the
198 | fields passed in are simple fields (numbers, dates, strings, etc.) then
199 | a simple `{'field_name': getattr(obj, field_name}` is returned. However,
200 | if the field name relates to a complex object (e.g. a related object)
201 | then this method will raise an `InvalidUpdateFields` exception. In this
202 | scenario you should override the default implementationwith one of your
203 | own.
204 | 
205 | ```python
206 | 
207 |     def as_search_document_update(self, index, update_fields):
208 |         if 'user' in update_fields:
209 |             # remove so that it won't raise a ValueError
210 |             update_fields.remove('user')
211 |             doc = super().as_search_document_update(index, update_fields)
212 |             doc['user'] = self.user.get_full_name()
213 |             return doc
214 |         return super().as_search_document_update(index, update_fields)
215 | ```
216 | 
217 | The reason we have split out the update from the full-document index
218 | comes from a real problem that we ourselves suffered. The full object
219 | representation that we were using was quite DB intensive - we were
220 | storing properties of the model that required walking the ORM tree.
221 | However, because we were also touching the objects (see below) to record
222 | activity timestamps, we ended up flooding the database with queries
223 | simply to update a single field in the output document. Partial updates
224 | solves this issue:
225 | 
226 | ```python
227 | 
228 |     def touch(self):
229 |         self.timestamp = now()
230 |         self.save(update_fields=['timestamp'])
231 | 
232 |     def as_search_document_update(self, index, update_fields):
233 |         if list(update_fields) == ['timestamp']:
234 |             # only propagate changes if it's +1hr since the last timestamp change
235 |             if now() - self.timestamp < timedelta(hours=1):
236 |                 return {}
237 |             else:
238 |                 return {'timestamp': self.timestamp}
239 |         ....
240 | ```
241 | 
242 | **Processing updates async**
243 | 
244 | If you are generating a lot of index updates you may want to run them
245 | async (via some kind of queueing mechanism). There is no built-in method
246 | to do this, given the range of queueing libraries and patterns
247 | available, however it is possible using the `pre_index`, `pre_update`
248 | and `pre_delete` signals. In this case, you should also turn off
249 | `AUTO_SYNC` (as this will run the updates synchronously), and process
250 | the updates yourself. The signals pass in the kwargs required by the
251 | relevant model methods, as well as the `instance` involved:
252 | 
253 | ```python
254 | # ensure that SEARCH_AUTO_SYNC=False
255 | 
256 | from django.dispatch import receiver
257 | import django_rq
258 | from elasticsearch_django.signals import (
259 |     pre_index,
260 |     pre_update,
261 |     pre_delete
262 | )
263 | 
264 | queue = django_rq.get_queue("elasticsearch")
265 | 
266 | 
267 | @receiver(pre_index, dispatch_uid="async_index_document")
268 | def index_search_document_async(sender, **kwargs):
269 |     """Queue up search index document update via RQ."""
270 |     instance = kwargs.pop("instance")
271 |     queue.enqueue(
272 |         instance.update_search_document,
273 |         index=kwargs.pop("index"),
274 |     )
275 | 
276 | 
277 | @receiver(pre_update, dispatch_uid="async_update_document")
278 | def update_search_document_async(sender, **kwargs):
279 |     """Queue up search index document update via RQ."""
280 |     instance = kwargs.pop("instance")
281 |     queue.enqueue(
282 |         instance.index_search_document,
283 |         index=kwargs.pop("index"),
284 |         update_fields=kwargs.pop("update_fields"),
285 |     )
286 | 
287 | 
288 | @receiver(pre_delete, dispatch_uid="async_delete_document")
289 | def delete_search_document_async(sender, **kwargs):
290 |     """Queue up search index document deletion via RQ."""
291 |     instance = kwargs.pop("instance")
292 |     queue.enqueue(
293 |         instance.delete_search_document,
294 |         index=kwargs.pop("index"),
295 |     )
296 | ```
297 | 
298 | **SearchDocumentManagerMixin**
299 | 
300 | This mixin must be implemented by the model's default manager
301 | (`objects`). It also requires a single method implementation -
302 | `get_search_queryset()` - which returns a queryset of objects that are
303 | to be indexed. This can also use the `index` kwarg to provide different
304 | sets of objects to different indexes.
305 | 
306 | ```python
307 |     def get_search_queryset(self, index='_all'):
308 |         return self.get_queryset().filter(foo='bar')
309 | ```
310 | 
311 | We now have the bare bones of our search implementation. We can now use
312 | the included management commands to create and populate our search
313 | index:
314 | 
315 | ```shell
316 | # create the index 'foo' from the 'foo.json' mapping file
317 | $ ./manage.py create_search_index foo
318 | 
319 | # populate foo with all the relevant objects
320 | $ ./manage.py update_search_index foo
321 | ```
322 | 
323 | The next step is to ensure that our models stay in sync with the index.
324 | 
325 | ## Add model signal handlers to update index
326 | 
327 | If the setting `auto_sync` is True, then on `AppConfig.ready` each model
328 | configured for use in an index has its `post_save` and `post_delete`
329 | signals connected. This means that they will be kept in sync across all
330 | indexes that they appear in whenever the relevant model method is
331 | called. (There is some very basic caching to prevent too many updates -
332 | the object document is cached for one minute, and if there is no change
333 | in the document the index update is ignored.)
334 | 
335 | There is a **VERY IMPORTANT** caveat to the signal handling. It will
336 | **only** pick up on changes to the model itself, and not on related
337 | (`ForeignKey`, `ManyToManyField`) model changes. If the search document
338 | is affected by such a change then you will need to implement additional
339 | signal handling yourself.
340 | 
341 | In addition to `object.save()`, SeachDocumentMixin also provides the
342 | `update_search_index(self, action, index='_all', update_fields=None,
343 | force=False)` method. Action should be 'index', 'update' or 'delete'.
344 | The difference between 'index' and 'update' is that 'update' is a
345 | partial update that only changes the fields specified, rather than
346 | re-updating the entire document. If `action` is 'update' whilst
347 | `update_fields` is None, action will be changed to `index`.
348 | 
349 | We now have documents in our search index, kept up to date with their
350 | Django counterparts. We are ready to start querying ES.
351 | 
352 | ---
353 | 
354 | # Search Queries (How to Search)
355 | 
356 | ## Running search queries
357 | 
358 | **SearchQuery**
359 | 
360 | The `elasticsearch_django.models.SearchQuery` model wraps this
361 | functionality up and provides helper properties, as well as logging the
362 | query:
363 | 
364 | ```python
365 | from elasticsearch_django.settings import get_client
366 | from elasticsearch_django.models import execute_search
367 | 
368 | # run a default match_all query
369 | sq = execute_search(index="foo", query={"match_all": {}})
370 | # the raw response is stored on the return object,
371 | # but is not stored on the object in the database.
372 | print(sq.response)
373 | ```
374 | 
375 | Calling the `execute_search` function will execute the underlying
376 | search, log the query JSON, the number of hits, and the list of hit meta
377 | information for future analysis. The `execute` method also includes
378 | these additional kwargs:
379 | 
380 | * `user` - the user who is making the query, useful for logging
381 | * `search_terms` - the search query supplied by the user (as opposed to
382 |   the DSL) - not used by ES, but stored in the logs
383 | * `reference` - a free text reference field - used for grouping searches
384 |   together - could be session id.
385 | * `save` - by default the SearchQuery created will be saved, but passing
386 |   in False will prevent this.
387 | 
388 | ## Converting search hits into Django objects
389 | 
390 | Running a search against an index will return a page of results, each
391 | containing the `_source` attribute which is the search document itself
392 | (as created by the `SearchDocumentMixin.as_search_document` method),
393 | together with meta info about the result - most significantly the
394 | relevance **score**, which is the magic value used for ranking
395 | (ordering) results. However, the search document probably doesn't
396 | contain all the of the information that you need to display the result,
397 | so what you really need is a standard Django QuerySet, containing the
398 | objects in the search results, but maintaining the order. This means
399 | injecting the ES score into the queryset, and then using it for
400 | ordering. There is a method on the `SearchDocumentManagerMixin` called
401 | `from_search_query` which will do this for you. It uses raw SQL to add
402 | the score as an annotation to each object in the queryset. (It also adds
403 | the 'rank' - so that even if the score is identical for all hits, the
404 | ordering is preserved.)
405 | 
406 | ```python
407 | from models import BlogPost
408 | 
409 | # run a default match_all query
410 | sq = execute_search(index="blog", query={"match_all": {}})
411 | for obj in BlogPost.objects.from_search_query(sq):
412 |     print obj.search_score, obj.search_rank
413 | ```
414 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |     postgres:
 4 |         image: postgres:9.6.8
 5 |         ports:
 6 |             - "5432:5432"
 7 |         volumes:
 8 |             - ./docker/postgres:/data
 9 | 
10 |     elasticsearch:
11 |         image: elasticsearch:7.8.1
12 |         ports:
13 |             - "9200:9200"
14 |             - "9300:9300"
15 |         environment:
16 |             - discovery.type=single-node
17 | 


--------------------------------------------------------------------------------
/elasticsearch_django/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunojuno/elasticsearch-django/f9f82e5843e6b071cbd4b3a01ea63caa399db075/elasticsearch_django/__init__.py


--------------------------------------------------------------------------------
/elasticsearch_django/admin.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import simplejson as json  # simplejson supports Decimal serialization
  4 | from django.contrib import admin
  5 | from django.template.defaultfilters import truncatechars, truncatewords
  6 | from django.utils.safestring import mark_safe
  7 | 
  8 | from .models import SearchQuery
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | def pretty_print(data: dict) -> str:
 14 |     """
 15 |     Return an indented HTML pretty-print version of JSON.
 16 | 
 17 |     Take the event_payload JSON, indent it, order the keys and then
 18 |     present it as a <code> block. That's about as good as we can get
 19 |     until someone builds a custom syntax function.
 20 | 
 21 |     """
 22 |     pretty = json.dumps(data, sort_keys=True, indent=4, separators=(",", ": "))
 23 |     html = pretty.replace(" ", "&nbsp;").replace("\n", "<br>")
 24 |     return mark_safe("<code>%s</code>" % html)  # noqa S703, S308
 25 | 
 26 | 
 27 | class SearchQueryAdmin(admin.ModelAdmin):
 28 |     list_display = (
 29 |         "id",
 30 |         "user",
 31 |         "search_terms_display",
 32 |         "total_hits_display",
 33 |         "returned_",
 34 |         "min_",
 35 |         "max_",
 36 |         "reference",
 37 |         "executed_at",
 38 |     )
 39 |     list_filter = ("index", "query_type")
 40 |     search_fields = ("search_terms", "user__first_name", "user__last_name", "reference")
 41 |     # excluding because we are using a pretty version instead
 42 |     exclude = ("hits", "aggregations", "query", "page", "total_hits_")
 43 |     readonly_fields = (
 44 |         "user",
 45 |         "index",
 46 |         "search_terms",
 47 |         "query_type",
 48 |         "total_hits",
 49 |         "total_hits_relation",
 50 |         "returned_",
 51 |         "min_",
 52 |         "max_",
 53 |         "duration",
 54 |         "query_",
 55 |         "hits_",
 56 |         "aggregations_",
 57 |         "executed_at",
 58 |     )
 59 | 
 60 |     def search_terms_display(self, instance: SearchQuery) -> str:
 61 |         """Return truncated version of search_terms."""
 62 |         raw = instance.search_terms
 63 |         # take first five words, and further truncate to 50 chars if necessary
 64 |         return truncatechars(truncatewords(raw, 5), 50)
 65 | 
 66 |     def query_(self, instance: SearchQuery) -> str:
 67 |         """Return pretty version of query JSON."""
 68 |         return pretty_print(instance.query)
 69 | 
 70 |     def max_(self, instance: SearchQuery) -> str:
 71 |         """Return pretty version of max_score."""
 72 |         return "-" if instance.page_size == 0 else str(instance.max_score)
 73 | 
 74 |     max_.short_description = "Max score"  # type: ignore
 75 | 
 76 |     def min_(self, instance: SearchQuery) -> str:
 77 |         """Return pretty version of min_score."""
 78 |         return "-" if instance.page_size == 0 else str(instance.min_score)
 79 | 
 80 |     min_.short_description = "Min score"  # type: ignore
 81 | 
 82 |     def total_hits_display(self, instance: SearchQuery) -> str:
 83 |         """Return total hit count, annotated if lower bound."""
 84 |         if instance.total_hits_relation == SearchQuery.TotalHitsRelation.ESTIMATE:
 85 |             return f"{instance.total_hits}*"
 86 |         return f"{instance.total_hits}"
 87 | 
 88 |     def returned_(self, instance: SearchQuery) -> str:
 89 |         """Return number of hits returned in the page."""
 90 |         if instance.page_size == 0:
 91 |             return "-"
 92 |         return "%i - %i" % (instance.page_from, instance.page_to)
 93 | 
 94 |     returned_.short_description = "Page returned"  # type: ignore
 95 | 
 96 |     def hits_(self, instance: SearchQuery) -> str:
 97 |         """Return pretty version of hits JSON."""
 98 |         return pretty_print(instance.hits)
 99 | 
100 |     def aggregations_(self, instance: SearchQuery) -> str:
101 |         """Return pretty version of aggregations JSON."""
102 |         return pretty_print(instance.aggregations)
103 | 
104 | 
105 | admin.site.register(SearchQuery, SearchQueryAdmin)
106 | 


--------------------------------------------------------------------------------
/elasticsearch_django/apps.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | from typing import TYPE_CHECKING, Any
  5 | 
  6 | from django.apps import AppConfig
  7 | from django.core.exceptions import ImproperlyConfigured
  8 | from django.db.models import Model, signals
  9 | 
 10 | from . import settings
 11 | from .signals import pre_delete, pre_index, pre_update
 12 | 
 13 | if TYPE_CHECKING:
 14 |     from elasticsearch_django.models import SearchDocumentMixin
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class ElasticAppConfig(AppConfig):
 20 |     """AppConfig for Search3."""
 21 | 
 22 |     name = "elasticsearch_django"
 23 |     verbose_name = "Elasticsearch"
 24 |     default_auto_field = "django.db.models.AutoField"
 25 | 
 26 |     def ready(self) -> None:
 27 |         """Validate config and connect signals."""
 28 |         super().ready()
 29 |         _validate_config(bool(settings.get_setting("strict_validation")))
 30 |         _connect_signals()
 31 | 
 32 | 
 33 | def _validate_config(strict: bool = False) -> None:
 34 |     """Validate settings.SEARCH_SETTINGS."""
 35 |     for index in settings.get_index_names():
 36 |         _validate_mapping(index, strict=strict)
 37 |         for model in settings.get_index_models(index):
 38 |             _validate_model(model)
 39 |     if settings.get_setting("update_strategy", "full") not in ["full", "partial"]:
 40 |         raise ImproperlyConfigured(
 41 |             "Invalid SEARCH_SETTINGS: 'update_strategy' value must be "
 42 |             "'full' or 'partial'."
 43 |         )
 44 | 
 45 | 
 46 | def _validate_mapping(index: str, strict: bool = False) -> None:
 47 |     """Check that an index mapping JSON file exists."""
 48 |     try:
 49 |         settings.get_index_mapping(index)
 50 |     except OSError:
 51 |         if strict:
 52 |             raise ImproperlyConfigured("Index '%s' has no mapping file." % index)
 53 |         else:
 54 |             logger.warning("Index '%s' has no mapping, relying on ES instead.", index)
 55 | 
 56 | 
 57 | def _validate_model(model: Model) -> None:
 58 |     """Check that a model configured for an index subclasses the required classes."""
 59 |     if not hasattr(model, "as_search_document"):
 60 |         raise ImproperlyConfigured("'%s' must implement `as_search_document`." % model)
 61 |     if not hasattr(model.objects, "get_search_queryset"):
 62 |         raise ImproperlyConfigured(
 63 |             "'%s.objects must implement `get_search_queryset`." % model
 64 |         )
 65 | 
 66 | 
 67 | def _connect_signals() -> None:
 68 |     """Connect up post_save, post_delete signals for models."""
 69 |     for index in settings.get_index_names():
 70 |         for model in settings.get_index_models(index):
 71 |             _connect_model_signals(model)
 72 | 
 73 | 
 74 | def _connect_model_signals(model: type[Model]) -> None:
 75 |     """Connect signals for a single model."""
 76 |     dispatch_uid = "%s.post_save" % model._meta.model_name
 77 |     logger.debug("Connecting search index model post_save signal: %s", dispatch_uid)
 78 |     signals.post_save.connect(_on_model_save, sender=model, dispatch_uid=dispatch_uid)
 79 |     dispatch_uid = "%s.post_delete" % model._meta.model_name
 80 |     logger.debug("Connecting search index model post_delete signal: %s", dispatch_uid)
 81 |     signals.post_delete.connect(
 82 |         _on_model_delete, sender=model, dispatch_uid=dispatch_uid
 83 |     )
 84 | 
 85 | 
 86 | def _on_model_save(sender: type[Model], **kwargs: Any) -> None:
 87 |     """Update document in search index post_save."""
 88 |     instance = kwargs.pop("instance")
 89 |     update_fields = kwargs.pop("update_fields")
 90 |     for index in instance.search_indexes:
 91 |         try:
 92 |             _update_search_index(
 93 |                 instance=instance, index=index, update_fields=update_fields
 94 |             )
 95 |         except Exception:  # noqa: B902
 96 |             logger.exception("Error handling 'on_save' signal for %s", instance)
 97 | 
 98 | 
 99 | def _on_model_delete(sender: type[Model], **kwargs: Any) -> None:
100 |     """Remove documents from search indexes post_delete."""
101 |     instance = kwargs.pop("instance")
102 |     for index in instance.search_indexes:
103 |         try:
104 |             _delete_from_search_index(instance=instance, index=index)
105 |         except Exception:  # noqa: B902
106 |             logger.exception("Error handling 'on_delete' signal for %s", instance)
107 | 
108 | 
109 | def _in_search_queryset(*, instance: Model, index: str) -> bool:
110 |     """Return True if instance is in the index queryset."""
111 |     try:
112 |         return instance.__class__.objects.in_search_queryset(instance.pk, index=index)
113 |     except Exception:  # noqa: B902
114 |         logger.exception("Error checking object in_search_queryset.")
115 |         return False
116 | 
117 | 
118 | def _update_search_index(
119 |     *, instance: SearchDocumentMixin, index: str, update_fields: list[str]
120 | ) -> None:
121 |     """Process index / update search index update actions."""
122 |     # signals get sent even if we don't auto_sync - that's what
123 |     # they are here for - to enable custom handling of the event.
124 |     signal_kwargs = dict(sender=instance.__class__, instance=instance, index=index)
125 |     if update_fields:
126 |         pre_update.send(**signal_kwargs, update_fields=update_fields)
127 |     else:
128 |         pre_index.send(**signal_kwargs)
129 | 
130 |     # if we don't auto_sync, we're done.
131 |     if not settings.auto_sync(instance):
132 |         return
133 | 
134 |     if not _in_search_queryset(instance=instance, index=index):
135 |         logger.debug(
136 |             "Skipping search index update for %s, not in search queryset.", instance
137 |         )
138 |         return
139 | 
140 |     try:
141 |         if update_fields:
142 |             instance.update_search_document(index=index, update_fields=update_fields)
143 |         else:
144 |             instance.index_search_document(index=index)
145 |     except Exception:  # noqa: B902
146 |         logger.exception("Error handling 'post_save' signal for %s", instance)
147 | 
148 | 
149 | def _delete_from_search_index(*, instance: SearchDocumentMixin, index: str) -> None:
150 |     """Remove a document from a search index."""
151 |     pre_delete.send(sender=instance.__class__, instance=instance, index=index)
152 |     if settings.auto_sync(instance):
153 |         instance.delete_search_document(index=index)
154 | 


--------------------------------------------------------------------------------
/elasticsearch_django/context_managers.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from datetime import timedelta
 4 | from types import TracebackType
 5 | 
 6 | from django.utils.timezone import now as tz_now
 7 | 
 8 | 
 9 | class stopwatch:
10 |     def __enter__(self) -> stopwatch:
11 |         self.started_at = tz_now()
12 |         self.stopped_at = None
13 |         self.in_progress = True
14 |         return self
15 | 
16 |     def __exit__(
17 |         self,
18 |         exc_type: type[Exception],
19 |         exc_value: Exception,
20 |         traceback: TracebackType,
21 |     ) -> None:
22 |         self.stopped_at = tz_now()
23 |         self.in_progress = False
24 | 
25 |     @property
26 |     def duration(self) -> timedelta:
27 |         if self.in_progress:
28 |             return tz_now() - self.started_at
29 |         return self.stopped_at - self.started_at
30 | 
31 |     @property
32 |     def elapsed(self) -> float:
33 |         return (self.duration).microseconds / 1e6
34 | 


--------------------------------------------------------------------------------
/elasticsearch_django/decorators.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from contextlib import contextmanager
 4 | from typing import Generator
 5 | 
 6 | from django.db.models import signals
 7 | 
 8 | from .apps import _on_model_save
 9 | 
10 | 
11 | @contextmanager
12 | def disable_search_updates() -> Generator:
13 |     """
14 |     Context manager used to temporarily disable auto_sync.
15 | 
16 |     This is useful when performing bulk updates on objects - when
17 |     you may not want to flood the indexing process.
18 | 
19 |     >>> with disable_search_updates():
20 |     ...     for obj in model.objects.all():
21 |     ...     obj.save()
22 | 
23 |     The function works by temporarily removing the apps._on_model_save
24 |     signal handler from the model.post_save signal receivers, and then
25 |     restoring them after.
26 | 
27 |     """
28 |     # get a list of the receivers for the search updates
29 |     search_update_receivers = [
30 |         r for r in signals.post_save.receivers if r[1]() == _on_model_save
31 |     ]
32 |     # strip them from the current post_save receivers
33 |     signals.post_save.receivers = [
34 |         r for r in signals.post_save.receivers if r not in search_update_receivers
35 |     ]
36 |     signals.post_save.sender_receivers_cache.clear()
37 |     yield
38 |     # add them back on again
39 |     signals.post_save.receivers += search_update_receivers
40 |     signals.post_save.sender_receivers_cache.clear()
41 | 


--------------------------------------------------------------------------------
/elasticsearch_django/index.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | from typing import Any, Generator, List, Sequence, Tuple, Union
  5 | 
  6 | from django.db.models import Model
  7 | from elastic_transport import ObjectApiResponse
  8 | from elasticsearch import helpers
  9 | 
 10 | from .models import SearchDocumentMixin
 11 | from .settings import get_client, get_index_mapping, get_index_models, get_setting
 12 | 
 13 | BulkResponseType = Tuple[int, Union[int, List[Any]]]
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def create_index(index: str) -> ObjectApiResponse:
 19 |     """Create an index and apply mapping if appropriate."""
 20 |     logger.info("Creating search index: '%s'", index)
 21 |     client = get_client()
 22 |     mapping = get_index_mapping(index)
 23 |     return client.indices.create(
 24 |         index=index,
 25 |         mappings=mapping["mappings"],
 26 |         settings=mapping.get("settings", None),
 27 |     )
 28 | 
 29 | 
 30 | def update_index(index: str) -> list[BulkResponseType]:
 31 |     """Re-index every document in a named index."""
 32 |     logger.info("Updating search index: '%s'", index)
 33 |     client = get_client()
 34 |     responses: list[BulkResponseType] = []
 35 |     for model in get_index_models(index):
 36 |         logger.info("Updating search index model: '%s'", model._meta.label)
 37 |         objects = model.objects.get_search_queryset(index).iterator()
 38 |         actions = bulk_actions(objects, index=index, action="index")
 39 |         response = helpers.bulk(client, actions, chunk_size=get_setting("chunk_size"))
 40 |         responses.append(response)
 41 |     return responses
 42 | 
 43 | 
 44 | def delete_index(index: str, ignore_unavailable: bool = True) -> ObjectApiResponse:
 45 |     """Delete index entirely (removes all documents and mapping)."""
 46 |     logger.info("Deleting search index: '%s'", index)
 47 |     indices = get_client().indices
 48 |     return indices.delete(index=index, ignore_unavailable=ignore_unavailable)
 49 | 
 50 | 
 51 | def prune_index(index: str) -> list[BulkResponseType]:
 52 |     """
 53 |     Remove all orphaned documents from an index.
 54 | 
 55 |     This function works by scanning the remote index, and in each returned
 56 |     batch of documents looking up whether they appear in the default index
 57 |     queryset. If they don't (they've been deleted, or no longer fit the qs
 58 |     filters) then they are deleted from the index. The deletion is done in
 59 |     one hit after the entire remote index has been scanned.
 60 | 
 61 |     The elasticsearch.helpers.scan function returns each document one at a
 62 |     time, so this function can swamp the database with SELECT requests.
 63 | 
 64 |     Please use sparingly.
 65 | 
 66 |     Returns a list of ids of all the objects deleted.
 67 | 
 68 |     """
 69 |     logger.info("Pruning missing objects from index '%s'", index)
 70 |     prunes: list[SearchDocumentMixin] = []
 71 |     responses: list[BulkResponseType] = []
 72 |     client = get_client()
 73 |     for model in get_index_models(index):
 74 |         for hit in scan_index(index, model):
 75 |             obj = _prune_hit(hit, model)
 76 |             if obj:
 77 |                 prunes.append(obj)
 78 |         logger.info(
 79 |             "Found %s objects of type '%s' for deletion from '%s'.",
 80 |             len(prunes),
 81 |             model._meta.label,
 82 |             index,
 83 |         )
 84 |         if len(prunes) > 0:
 85 |             actions = bulk_actions(prunes, index, "delete")
 86 |             response = helpers.bulk(
 87 |                 client, actions, chunk_size=get_setting("chunk_size")
 88 |             )
 89 |             responses.append(response)
 90 |     return responses
 91 | 
 92 | 
 93 | def _prune_hit(hit: dict, model: Model) -> Model | None:
 94 |     """
 95 |     Check whether a document should be pruned.
 96 | 
 97 |     This method uses the SearchDocumentManagerMixin.in_search_queryset method
 98 |     to determine whether a 'hit' (search document) should be pruned from an index,
 99 |     and if so it returns the hit as a Django object(id=hit_id).
100 | 
101 |     Args:
102 |         hit: dict object the represents a document as returned from the scan_index
103 |             function. (Contains object id and index.)
104 |         model: the Django model (not object) from which the document was derived.
105 |             Used to get the correct model manager and bulk action.
106 | 
107 |     Returns:
108 |         an object of type model, with id=hit_id. NB this is not the object
109 |         itself, which by definition may not exist in the underlying database,
110 |         but a temporary object with the document id - which is enough to create
111 |         a 'delete' action.
112 | 
113 |     """
114 |     hit_id = hit["_id"]
115 |     hit_index = hit["_index"]
116 |     if model.objects.in_search_queryset(hit_id, index=hit_index):
117 |         logger.debug(
118 |             "%s with id=%s exists in the '%s' index queryset.",
119 |             model._meta.label,
120 |             hit_id,
121 |             hit_index,
122 |         )
123 |         return None
124 |     else:
125 |         logger.debug(
126 |             "%s with id=%s does not exist in the '%s' index "
127 |             "queryset and will be pruned.",
128 |             model._meta.label,
129 |             hit_id,
130 |             hit_index,
131 |         )
132 |         # we don't need the full obj for a delete action, just the id.
133 |         # (the object itself may not even exist.)
134 |         return model(pk=hit_id)
135 | 
136 | 
137 | def scan_index(index: str, model: Model) -> Generator:
138 |     """
139 |     Yield all documents of model type in an index.
140 | 
141 |     This function calls the elasticsearch.helpers.scan function,
142 |     and yields all the documents in the index that match the doc_type
143 |     produced by a specific Django model.
144 | 
145 |     Args:
146 |         index: string, the name of the index to scan, must be a configured
147 |             index as returned from settings.get_index_names.
148 |         model: a Django model type, used to filter the the documents that
149 |             are scanned.
150 | 
151 |     Yields each document of type model in index, one at a time.
152 | 
153 |     """
154 |     # noqa: E501, see https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-type-query.html
155 |     client = get_client()
156 |     yield from helpers.scan(client, index=index)
157 | 
158 | 
159 | def bulk_actions(objects: Sequence[Model], index: str, action: str) -> Generator:
160 |     """
161 |     Yield bulk api 'actions' from a collection of objects.
162 | 
163 |     The output from this method can be fed in to the bulk
164 |     api helpers - each document returned by get_documents
165 |     is decorated with the appropriate bulk api op_type.
166 | 
167 |     Args:
168 |         objects: iterable (queryset, list, ...) of SearchDocumentMixin
169 |             objects. If the objects passed in is a generator, then this
170 |                 function will yield the results rather than returning them.
171 |         index: string, the name of the index to target - the index name
172 |             is embedded into the return value and is used by the bulk api.
173 |         action: string ['index' | 'update' | 'delete'] - this decides
174 |             how the final document is formatted.
175 | 
176 |     """
177 |     if index == "_all":
178 |         raise ValueError(
179 |             "index arg must be a valid index name. '_all' is a reserved term."
180 |         )
181 |     logger.info("Creating bulk '%s' actions for '%s'", action, index)
182 |     for obj in objects:
183 |         try:
184 |             logger.debug("Appending '%s' action for '%r'", action, obj)
185 |             yield obj.as_search_action(index=index, action=action)
186 |         except Exception:  # noqa: B902
187 |             logger.exception("Unable to create search action for %s", obj)
188 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/__init__.py:
--------------------------------------------------------------------------------
1 | # search.management package identifier
2 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | """Base command for search-related management commands."""
 2 | from __future__ import annotations
 3 | 
 4 | import argparse
 5 | import builtins
 6 | import logging
 7 | from typing import Any, Optional, Union
 8 | 
 9 | from django.core.management.base import BaseCommand
10 | from elasticsearch.exceptions import TransportError
11 | 
12 | CommandReturnType = Optional[Union[list, dict]]
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class BaseSearchCommand(BaseCommand):
17 |     """Base class for commands that interact with the search index."""
18 | 
19 |     description = "Base search command."
20 | 
21 |     def _confirm_action(self) -> bool:
22 |         """Return True if the user confirms the action."""
23 |         msg = "Are you sure you wish to continue? [y/N] "
24 |         return builtins.input(msg).lower().startswith("y")
25 | 
26 |     def add_arguments(self, parser: argparse.ArgumentParser) -> None:
27 |         """Add default base options of --noinput and indexes."""
28 |         parser.add_argument(
29 |             "-f",
30 |             "--noinput",
31 |             action="store_false",
32 |             dest="interactive",
33 |             default=True,
34 |             help="Do no display user prompts - may affect data.",
35 |         )
36 |         parser.add_argument(
37 |             "indexes", nargs="*", help="Names of indexes on which to run the command."
38 |         )
39 | 
40 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
41 |         """Run a command against a named index."""
42 |         raise NotImplementedError()
43 | 
44 |     def handle(self, *args: Any, **options: Any) -> None:
45 |         """Run do_index_command on each specified index and log the output."""
46 |         for index in options.pop("indexes"):
47 |             try:
48 |                 data = self.do_index_command(index, **options)
49 |             except TransportError:
50 |                 logger.exception("Elasticsearch threw a TransportError")
51 |             except FileNotFoundError:
52 |                 logger.exception("Elasticsearch mapping file not found")
53 |             except Exception:  # noqa: B902
54 |                 logger.exception("Unhandled error running Elasticsearch index command")
55 |             else:
56 |                 logger.info(data)
57 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/create_search_index.py:
--------------------------------------------------------------------------------
 1 | """Create a search index."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Any
 5 | 
 6 | from ...index import create_index
 7 | from . import BaseSearchCommand, CommandReturnType
 8 | 
 9 | 
10 | class Command(BaseSearchCommand):
11 |     """Run the management command."""
12 | 
13 |     help = "Create a new search index using the relevant mapping file."
14 |     description = "Create search index"
15 | 
16 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
17 |         """Create new search index."""
18 |         return create_index(index).body
19 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/delete_search_index.py:
--------------------------------------------------------------------------------
 1 | """Delete a search index (and all documents therein)."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from typing import Any
 6 | 
 7 | from ...index import delete_index
 8 | from . import BaseSearchCommand, CommandReturnType
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class Command(BaseSearchCommand):
14 |     """Delete search index."""
15 | 
16 |     help = "Clears out the specified (or all) search index completely."
17 |     description = "Delete search index"
18 | 
19 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
20 |         """Delete search index."""
21 |         if options["interactive"]:
22 |             logger.warning("This will permanently delete the index '%s'.", index)
23 |             if not self._confirm_action():
24 |                 logger.warning(
25 |                     "Aborting deletion of index '%s' at user's request.", index
26 |                 )
27 |                 return None
28 |         return delete_index(index).body
29 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/prune_search_index.py:
--------------------------------------------------------------------------------
 1 | """Remove all documents in a search index that no longer exist in source queryset."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Any
 5 | 
 6 | from ...index import prune_index
 7 | from . import BaseSearchCommand, CommandReturnType
 8 | 
 9 | 
10 | class Command(BaseSearchCommand):
11 |     """Run the management command."""
12 | 
13 |     help = "Remove all out-of-date documents in a search index."
14 |     description = "Prune search index"
15 | 
16 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
17 |         """Prune search index."""
18 |         return prune_index(index)
19 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/rebuild_search_index.py:
--------------------------------------------------------------------------------
 1 | """Create a search index."""
 2 | from __future__ import annotations
 3 | 
 4 | import logging
 5 | from typing import Any
 6 | 
 7 | from elasticsearch.exceptions import TransportError
 8 | 
 9 | from ...index import create_index, delete_index, update_index
10 | from . import BaseSearchCommand, CommandReturnType
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | class Command(BaseSearchCommand):
16 |     """Run the management command."""
17 | 
18 |     help = (
19 |         "Delete, create and update a new search index using the relevant mapping file."
20 |     )
21 |     description = "Rebuild search index"
22 | 
23 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
24 |         """Rebuild search index."""
25 |         if options["interactive"]:
26 |             logger.warning("This will permanently delete the index '%s'.", index)
27 |             if not self._confirm_action():
28 |                 logger.warning(
29 |                     "Aborting rebuild of index '%s' at user's request.", index
30 |                 )
31 |                 return None
32 | 
33 |         try:
34 |             delete = delete_index(index).body
35 |         except TransportError:
36 |             delete = {}
37 |             logger.info("Index %s does not exist, cannot be deleted.", index)
38 |         create = create_index(index).body
39 |         update = update_index(index)
40 | 
41 |         return {"delete": delete, "create": create, "update": update}
42 | 


--------------------------------------------------------------------------------
/elasticsearch_django/management/commands/update_search_index.py:
--------------------------------------------------------------------------------
 1 | """Update all documents in a search index."""
 2 | from __future__ import annotations
 3 | 
 4 | from typing import Any
 5 | 
 6 | from ...index import update_index
 7 | from . import BaseSearchCommand, CommandReturnType
 8 | 
 9 | 
10 | class Command(BaseSearchCommand):
11 |     """Run the management command."""
12 | 
13 |     help = "Update all documents in a search index."
14 |     description = "Update search index."
15 | 
16 |     def do_index_command(self, index: str, **options: Any) -> CommandReturnType:
17 |         """Update search index."""
18 |         return update_index(index)
19 | 


--------------------------------------------------------------------------------
/elasticsearch_django/mappings/README.md:
--------------------------------------------------------------------------------
 1 | # ElasticSeach Mappings
 2 | 
 3 | This directory contains the Elasticsearch index mappings. Each file describes a single index, with
 4 | the name of the file matching the name of the index. e.g. `profiles.json` describes the profiles
 5 | index.
 6 | 
 7 | The mappings file does not describe how the index is populated - only how the search index analyses
 8 | and stores the data that is posted to it.
 9 | 
10 | See https://www.elastic.co/guide/en/elasticsearch/reference/7.x/mapping.html for more details on
11 | mapping and index configuration.
12 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | from django.conf import settings
 2 | from django.db import migrations, models
 3 | 
 4 | 
 5 | class Migration(migrations.Migration):
 6 |     dependencies = [migrations.swappable_dependency(settings.AUTH_USER_MODEL)]
 7 | 
 8 |     operations = [
 9 |         migrations.CreateModel(
10 |             name="SearchQuery",
11 |             fields=[
12 |                 (
13 |                     "id",
14 |                     models.AutoField(
15 |                         verbose_name="ID",
16 |                         serialize=False,
17 |                         auto_created=True,
18 |                         primary_key=True,
19 |                     ),
20 |                 ),
21 |                 (
22 |                     "index",
23 |                     models.CharField(
24 |                         default="_all",
25 |                         help_text="The name of the Elasticsearch index(es) being queried.",
26 |                         max_length=100,
27 |                     ),
28 |                 ),
29 |                 (
30 |                     "query",
31 |                     models.TextField(
32 |                         default="{}", help_text="The raw Elasticsearch DSL query."
33 |                     ),
34 |                 ),
35 |                 (
36 |                     "hits",
37 |                     models.TextField(
38 |                         default="{}",
39 |                         help_text="The list of meta info for each of the query matches returned.",
40 |                     ),
41 |                 ),
42 |                 (
43 |                     "total_hits",
44 |                     models.IntegerField(
45 |                         default=0,
46 |                         help_text="Total number of matches found for the query (!= the hits returned).",
47 |                     ),
48 |                 ),
49 |                 (
50 |                     "reference",
51 |                     models.CharField(
52 |                         default="",
53 |                         help_text="Custom reference used to identify and group related searches.",
54 |                         max_length=100,
55 |                         blank=True,
56 |                     ),
57 |                 ),
58 |                 (
59 |                     "executed_at",
60 |                     models.DateTimeField(
61 |                         help_text="When the search was executed - set via execute() method."
62 |                     ),
63 |                 ),
64 |                 (
65 |                     "user",
66 |                     models.ForeignKey(
67 |                         related_name="search_queries",
68 |                         blank=True,
69 |                         to=settings.AUTH_USER_MODEL,
70 |                         help_text="The user who made the search query (nullable).",
71 |                         null=True,
72 |                         on_delete=models.SET_NULL,
73 |                     ),
74 |                 ),
75 |             ],
76 |         )
77 |     ]
78 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0002_searchquery_duration.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations, models
 2 | 
 3 | 
 4 | class Migration(migrations.Migration):
 5 |     dependencies = [("elasticsearch_django", "0001_initial")]
 6 | 
 7 |     operations = [
 8 |         migrations.AddField(
 9 |             model_name="searchquery",
10 |             name="duration",
11 |             field=models.FloatField(
12 |                 default=0,
13 |                 help_text="Time taken to execute the search itself, in seconds.",
14 |             ),
15 |             preserve_default=False,
16 |         )
17 |     ]
18 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0003_auto_20160926_2021.py:
--------------------------------------------------------------------------------
 1 | from django.db import migrations
 2 | 
 3 | 
 4 | class Migration(migrations.Migration):
 5 |     dependencies = [("elasticsearch_django", "0002_searchquery_duration")]
 6 | 
 7 |     operations = [
 8 |         migrations.AlterModelOptions(
 9 |             name="searchquery",
10 |             options={
11 |                 "verbose_name": "Search query",
12 |                 "verbose_name_plural": "Search queries",
13 |             },
14 |         )
15 |     ]
16 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0004_auto_20161129_1135.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 1.9 on 2016-11-29 11:35
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [("elasticsearch_django", "0003_auto_20160926_2021")]
 8 | 
 9 |     operations = [
10 |         migrations.AlterField(
11 |             model_name="searchquery",
12 |             name="hits",
13 |             field=models.JSONField(
14 |                 help_text="The list of meta info for each of the query matches returned."
15 |             ),
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="searchquery",
19 |             name="query",
20 |             field=models.JSONField(help_text="The raw Elasticsearch DSL query."),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0005_convert_JSONFields.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 1.10 on 2017-01-03 16:11
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [("elasticsearch_django", "0004_auto_20161129_1135")]
 8 | 
 9 |     operations = [
10 |         migrations.AlterField(
11 |             model_name="searchquery",
12 |             name="hits",
13 |             field=models.JSONField(
14 |                 help_text="The list of meta info for each of the query matches returned."
15 |             ),
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="searchquery",
19 |             name="query",
20 |             field=models.JSONField(help_text="The raw Elasticsearch DSL query."),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0006_add_encoder_JSONField_kwarg.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 1.11a1 on 2017-04-06 14:40
 2 | 
 3 | from django.core.serializers.json import DjangoJSONEncoder
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [("elasticsearch_django", "0005_convert_JSONFields")]
 9 | 
10 |     operations = [
11 |         migrations.AlterField(
12 |             model_name="searchquery",
13 |             name="hits",
14 |             field=models.JSONField(
15 |                 encoder=DjangoJSONEncoder,
16 |                 help_text="The list of meta info for each of the query matches returned.",
17 |             ),
18 |         ),
19 |         migrations.AlterField(
20 |             model_name="searchquery",
21 |             name="query",
22 |             field=models.JSONField(
23 |                 encoder=DjangoJSONEncoder, help_text="The raw Elasticsearch DSL query."
24 |             ),
25 |         ),
26 |     ]
27 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0007_update_json_field_encoders.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.0.5 on 2018-05-11 13:30
 2 | 
 3 | import django.core.serializers.json
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [("elasticsearch_django", "0006_add_encoder_JSONField_kwarg")]
 9 | 
10 |     operations = [
11 |         migrations.AlterField(
12 |             model_name="searchquery",
13 |             name="hits",
14 |             field=models.JSONField(
15 |                 encoder=django.core.serializers.json.DjangoJSONEncoder,
16 |                 help_text="The list of meta info for each of the query matches returned.",
17 |             ),
18 |         ),
19 |         migrations.AlterField(
20 |             model_name="searchquery",
21 |             name="query",
22 |             field=models.JSONField(
23 |                 encoder=django.core.serializers.json.DjangoJSONEncoder,
24 |                 help_text="The raw Elasticsearch DSL query.",
25 |             ),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0008_searchquery_search_terms.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.0.5 on 2018-08-03 16:40
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [("elasticsearch_django", "0007_update_json_field_encoders")]
 8 | 
 9 |     operations = [
10 |         migrations.AddField(
11 |             model_name="searchquery",
12 |             name="search_terms",
13 |             field=models.CharField(
14 |                 blank=True,
15 |                 default="",
16 |                 help_text="Free text search terms used in the query, stored for easy reference.",
17 |                 max_length=400,
18 |             ),
19 |         )
20 |     ]
21 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0009_searchquery_query_type.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 2.2 on 2019-04-11 17:15
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | from elasticsearch_django.models import SearchQuery
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [("elasticsearch_django", "0008_searchquery_search_terms")]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="searchquery",
14 |             name="query_type",
15 |             field=models.CharField(
16 |                 choices=SearchQuery.QueryType.choices,
17 |                 default="SEARCH",
18 |                 help_text="Does this query return results, or just the hit count?",
19 |                 max_length=10,
20 |             ),
21 |         )
22 |     ]
23 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0010_searchquery_total_hits_relation.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.8 on 2020-07-29 15:54
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("elasticsearch_django", "0009_searchquery_query_type"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="searchquery",
14 |             name="total_hits_relation",
15 |             field=models.CharField(
16 |                 blank=True,
17 |                 choices=[
18 |                     ("eq", "Accurate hit count"),
19 |                     ("gte", "Lower bound of total hits"),
20 |                 ],
21 |                 default="",
22 |                 help_text="Indicates whether this is an exact match ('eq') or a lower bound ('gte')",
23 |                 max_length=3,
24 |             ),
25 |         ),
26 |     ]
27 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0011_searchquery_aggregations.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.1 on 2020-08-15 11:52
 2 | 
 3 | import django.core.serializers.json
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("elasticsearch_django", "0010_searchquery_total_hits_relation"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="searchquery",
15 |             name="aggregations",
16 |             field=models.JSONField(
17 |                 default=dict,
18 |                 encoder=django.core.serializers.json.DjangoJSONEncoder,
19 |                 help_text="The raw aggregations returned from the query.",
20 |             ),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/0012_alter_searchquery_aggregations_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.1.4 on 2022-12-20 13:00
 2 | 
 3 | import django.core.serializers.json
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("elasticsearch_django", "0011_searchquery_aggregations"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="searchquery",
15 |             name="aggregations",
16 |             field=models.JSONField(
17 |                 blank=True,
18 |                 default=None,
19 |                 encoder=django.core.serializers.json.DjangoJSONEncoder,
20 |                 help_text="The raw aggregations returned from the query.",
21 |                 null=True,
22 |             ),
23 |         ),
24 |         migrations.AlterField(
25 |             model_name="searchquery",
26 |             name="hits",
27 |             field=models.JSONField(
28 |                 blank=True,
29 |                 encoder=django.core.serializers.json.DjangoJSONEncoder,
30 |                 help_text="The list of meta info for each of the query matches returned.",
31 |                 null=True,
32 |             ),
33 |         ),
34 |     ]
35 | 


--------------------------------------------------------------------------------
/elasticsearch_django/migrations/__init__.py:
--------------------------------------------------------------------------------
1 | # elasticsearch_django migrations package identifier.
2 | 


--------------------------------------------------------------------------------
/elasticsearch_django/models.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import copy
  4 | import logging
  5 | from typing import Any, cast
  6 | 
  7 | from django.conf import settings
  8 | from django.core.cache import cache
  9 | from django.core.serializers.json import DjangoJSONEncoder
 10 | from django.db import models
 11 | from django.db.models import Case, Value, When
 12 | from django.db.models.query import QuerySet
 13 | from django.utils.functional import SimpleLazyObject
 14 | from django.utils.translation import gettext_lazy as _lazy
 15 | from elastic_transport import ObjectApiResponse
 16 | from elasticsearch import Elasticsearch
 17 | 
 18 | from .context_managers import stopwatch
 19 | from .settings import (
 20 |     get_client,
 21 |     get_model_index_properties,
 22 |     get_model_indexes,
 23 |     get_setting,
 24 | )
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | UPDATE_STRATEGY_FULL = "full"
 29 | UPDATE_STRATEGY_PARTIAL = "partial"
 30 | UPDATE_STRATEGY = get_setting("update_strategy", UPDATE_STRATEGY_FULL)
 31 | 
 32 | DEFAULT_CLIENT: Elasticsearch = SimpleLazyObject(get_client)
 33 | DEFAULT_FROM: int = 0
 34 | DEFAULT_PAGE_SIZE = cast(int, get_setting("page_size"))
 35 | DEFAULT_INCLUDE_SOURCE = bool(get_setting("include_source", True))
 36 | 
 37 | 
 38 | class SearchResultsQuerySet(QuerySet):
 39 |     """
 40 |     QuerySet mixin that adds annotations from search results.
 41 | 
 42 |     This class is designed to be used as a QuerySet mixin for models that can
 43 |     be mapped on to a set of search results, but that are not the source models.
 44 | 
 45 |     As an example, if you have a Profile model and a ProfileSearchDocument model
 46 |     that is a 1:1 relationship, with the ProfileSearchDocument configured to be
 47 |     the index source, then this class can be used to map the results from the
 48 |     search result id back to the Profile.
 49 | 
 50 | 
 51 |         class ProfileQuerySet(SearchDocumentQuerySet):
 52 |             pass
 53 | 
 54 | 
 55 |         class Profile(Model):
 56 |             pass
 57 | 
 58 | 
 59 |         class ProfileSearchDocument(SearchDocumentMixing, Model):
 60 |             profile = OneToOne(Profile)
 61 | 
 62 |             def get_search_document_id(self):
 63 |                 return self.profile.pk
 64 | 
 65 | 
 66 |         >>> search_query = execute_search(...)
 67 |         >>> profiles = (
 68 |                 Profile.objects.all()
 69 |                 .filter_search_results(search_query)
 70 |                 .add_search_annotations(search_query)
 71 |                 .add_search_highlights(search_query)
 72 |             )
 73 |         ...
 74 |         [<Profile>, <Profile>]
 75 |         >>> profiles[0].search_rank
 76 |         1
 77 |         >>> profiles[0].search_score
 78 |         3.12345
 79 |         >>> profiles[0].search_highlights
 80 |         {
 81 |             "resume": ["foo"]
 82 |         }
 83 | 
 84 |     """
 85 | 
 86 |     # the field used to map objects to search document id
 87 |     search_document_id_field = "pk"
 88 | 
 89 |     def filter_search_results(self, search_query: SearchQuery) -> SearchResultsQuerySet:
 90 |         """Filter queryset on PK field to match search query hits."""
 91 |         return self.filter(
 92 |             **{f"{self.search_document_id_field}__in": search_query.object_ids}
 93 |         )
 94 | 
 95 |     def add_search_rank(self, search_query: SearchQuery) -> SearchResultsQuerySet:
 96 |         """Add search_rank annotation to queryset."""
 97 |         if search_rank_annotation := search_query.search_rank_annotation(
 98 |             self.search_document_id_field
 99 |         ):
100 |             return self.annotate(search_rank=search_rank_annotation)
101 |         return self.annotate(search_rank=Value(1))
102 | 
103 |     def add_search_score(self, search_query: SearchQuery) -> SearchResultsQuerySet:
104 |         """Add search_score annotation to queryset."""
105 |         if search_score_annotation := search_query.search_score_annotation(
106 |             self.search_document_id_field
107 |         ):
108 |             return self.annotate(search_score=search_score_annotation)
109 |         return self.annotate(search_score=Value(1.0))
110 | 
111 |     def add_search_annotations(
112 |         self, search_query: SearchQuery
113 |     ) -> SearchResultsQuerySet:
114 |         """Add search_rank and search_score annotations to queryset."""
115 |         return self.add_search_rank(search_query).add_search_score(search_query)
116 | 
117 |     def add_search_highlights(self, search_query: SearchQuery) -> list:
118 |         """Add search_highlights attr. to each object in the queryset (evaluates QS)."""
119 |         obj_list = list(self)
120 |         if not search_query.has_highlights:
121 |             return obj_list
122 | 
123 |         for obj in obj_list:
124 |             pk = getattr(obj, self.search_document_id_field)
125 |             obj.search_highlights = search_query.get_doc_highlights(pk)
126 |         return obj_list
127 | 
128 |     def from_search_results(self, search_query: SearchQuery) -> SearchResultsQuerySet:
129 |         qs = self.filter_search_results(search_query)
130 |         qs = qs.add_search_annotations(search_query)
131 |         return qs.order_by("search_rank")
132 | 
133 | 
134 | class SearchDocumentManagerMixin(models.Manager):
135 |     """
136 |     Model manager mixin that adds search document methods.
137 | 
138 |     There is one method in this class that must implemented -
139 |     `get_search_queryset`. This must return a queryset that is the set
140 |     of objects to be indexed. This queryset is then converted into a
141 |     generator that emits the objects as JSON documents.
142 | 
143 |     If you are using a different database connection for the
144 |     `get_search_queryset` method from the one that you use to save
145 |     models you may run into a situation where the `in_search_queryset`
146 |     method returns False for an object that has been created because the
147 |     `get_search_queryset` query runs in a different transaction from the
148 |     one that created the object.
149 | 
150 |     To avoid this, you can set the `IN_SEARCH_QUERYSET_DB_ALIAS`
151 |     settings to force `in_search_queryset` to use the same database
152 |     connection as that used to create the object.
153 | 
154 |     Edge case, but it does happen.
155 | 
156 |     """
157 | 
158 |     IN_SEARCH_QUERYSET_DB_ALIAS = get_setting("in_search_queryset_db_alias", "")
159 | 
160 |     def get_search_queryset(self, index: str = "_all") -> QuerySet:
161 |         """
162 |         Return the dataset used to populate the search index.
163 | 
164 |         Kwargs:
165 |             index: string, the name of the index we are interested in -
166 |                 this allows us to have different sets of objects in
167 |                 different indexes. Defaults to '_all', in which case
168 |                 all indexes index the same set of objects.
169 | 
170 |         This must return a queryset object.
171 | 
172 |         """
173 |         raise NotImplementedError(
174 |             "{} does not implement 'get_search_queryset'.".format(
175 |                 self.__class__.__name__
176 |             )
177 |         )
178 | 
179 |     def in_search_queryset(self, instance_pk: Any, index: str = "_all") -> bool:
180 |         """
181 |         Return True if an object is part of the search index queryset.
182 | 
183 |         Sometimes it's useful to know if an object _should_ be indexed. If
184 |         an object is saved, how do you know if you should push that change
185 |         to the search index? The simplest (albeit not most efficient) way
186 |         is to check if it appears in the underlying search queryset.
187 | 
188 |         NB this method doesn't evaluate the entire dataset, it chains an
189 |         additional queryset filter expression on the end. That's why it's
190 |         important that the `get_search_queryset` method returns a queryset.
191 | 
192 |         Args:
193 |             instance_pk: the primary key of model object that we are looking for.
194 | 
195 |         Kwargs:
196 |             index: string, the name of the index in which to check.
197 |                 Defaults to '_all'.
198 | 
199 |         """
200 |         qs = self.get_search_queryset(index=index).filter(pk=instance_pk)
201 |         if alias := self.IN_SEARCH_QUERYSET_DB_ALIAS:
202 |             qs = qs.using(alias)
203 |         return qs.exists()
204 | 
205 | 
206 | class SearchDocumentMixin:
207 |     """
208 |     Mixin used by models that are indexed for ES.
209 | 
210 |     This mixin defines the interface exposed by models that
211 |     are indexed ready for ES. The only method that needs
212 |     implementing is `as_search_document`.
213 | 
214 |     """
215 | 
216 |     @property
217 |     def _model_meta(self) -> Any:
218 |         if not (meta := getattr(self, "_meta")):
219 |             raise ValueError(
220 |                 "SearchDocumentMixin missing _meta attr - "
221 |                 "have you forgotten to subclass models.Model?"
222 |             )
223 |         return meta
224 | 
225 |     @property
226 |     def search_indexes(self) -> list[str]:
227 |         """Return the list of indexes for which this model is configured."""
228 |         return get_model_indexes(self.__class__)
229 | 
230 |     @property
231 |     def search_document_cache_key(self) -> str:
232 |         """Key used for storing search docs in local cache."""
233 |         return "elasticsearch_django:{}.{}.{}".format(
234 |             self._model_meta.app_label,
235 |             self._model_meta.model_name,
236 |             self.get_search_document_id(),
237 |         )
238 | 
239 |     def as_search_document(self, *, index: str) -> dict:
240 |         """
241 |         Return the object as represented in a named index.
242 | 
243 |         This is named to avoid confusion - if it was `get_search_document`,
244 |         which would be the logical name, it would not be clear whether it
245 |         referred to getting the local representation of the search document,
246 |         or actually fetching it from the index.
247 | 
248 |         Kwargs:
249 |             index: string, the name of the index in which the object is to
250 |                 appear - this allows different representations in different
251 |                 indexes. Defaults to '_all', in which case all indexes use
252 |                 the same search document structure.
253 | 
254 |         Returns a dictionary.
255 | 
256 |         """
257 |         raise NotImplementedError(
258 |             "{} does not implement 'as_search_document'.".format(
259 |                 self.__class__.__name__
260 |             )
261 |         )
262 | 
263 |     def get_search_document_id(self) -> str:
264 |         """
265 |         Return the value to be used as the search document id.
266 | 
267 |         This value defaults to the object pk value - which is cast to a
268 |         str value as that is what ES uses.
269 | 
270 |         It can be overridden in subclasses if you want to use a different
271 |         value.
272 | 
273 |         """
274 |         return str(getattr(self, "pk"))
275 | 
276 |     @property
277 |     def _related_fields(self) -> list[str]:
278 |         """Return the list of fields that are relations and not serializable."""
279 |         return [f.name for f in self._model_meta.get_fields() if f.is_relation]
280 | 
281 |     def clean_update_fields(self, index: str, update_fields: list[str]) -> list[str]:
282 |         """
283 |         Clean the list of update_fields based on the index being updated.
284 | 
285 |         If any field in the update_fields list is not in the set of properties
286 |         defined by the index mapping for this model, then we ignore it. If
287 |         a field _is_ in the mapping, but the underlying model field is a
288 |         related object, and thereby not directly serializable, then this
289 |         method will raise a ValueError.
290 | 
291 |         """
292 |         search_fields = get_model_index_properties(self, index)
293 |         clean_fields = [f for f in update_fields if f in search_fields]
294 |         ignore = [f for f in update_fields if f not in search_fields]
295 |         if ignore:
296 |             logger.debug("Ignoring fields from partial update: %s", ignore)
297 | 
298 |         for f in clean_fields:
299 |             if f in self._related_fields:
300 |                 raise ValueError(
301 |                     "'%s' cannot be automatically serialized into a search "
302 |                     "document property. Please override as_search_document_update.",
303 |                     f,
304 |                 )
305 |         return clean_fields
306 | 
307 |     def as_search_document_update(
308 |         self, *, index: str, update_fields: list[str]
309 |     ) -> dict:
310 |         """
311 |         Return a partial update document based on which fields have been updated.
312 | 
313 |         If an object is saved with the `update_fields` argument passed
314 |         through, then it is assumed that this is a 'partial update'. In
315 |         this scenario we need a {property: value} dictionary containing
316 |         just the fields we want to update.
317 | 
318 |         This method handles two possible update strategies - 'full' or 'partial'.
319 |         The default 'full' strategy simply returns the value of `as_search_document`
320 |         - thereby replacing the entire document each time. The 'partial' strategy is
321 |         more intelligent - it will determine whether the fields passed are in the
322 |         search document mapping, and return a partial update document that contains
323 |         only those that are. In addition, if any field that _is_ included cannot
324 |         be automatically serialized (e.g. a RelatedField object), then this method
325 |         will raise a ValueError. In this scenario, you should override this method
326 |         in your subclass.
327 | 
328 |         >>> def as_search_document_update(self, index, update_fields):
329 |         ...     if 'user' in update_fields:
330 |         ...         update_fields.remove('user')
331 |         ...         doc = super().as_search_document_update(index, update_fields)
332 |         ...         doc['user'] = self.user.get_full_name()
333 |         ...         return doc
334 |         ...     return super().as_search_document_update(index, update_fields)
335 | 
336 |         You may also wish to subclass this method to perform field-specific logic
337 |         - in this example if only the timestamp is being saved, then ignore the
338 |         update if the timestamp is later than a certain time.
339 | 
340 |         >>> def as_search_document_update(self, index, update_fields):
341 |         ...     if update_fields == ['timestamp']:
342 |         ...         if self.timestamp > today():
343 |         ...            return {}
344 |         ...     return super().as_search_document_update(index, update_fields)
345 | 
346 |         """
347 |         if UPDATE_STRATEGY == UPDATE_STRATEGY_FULL:
348 |             return self.as_search_document(index=index)
349 | 
350 |         if UPDATE_STRATEGY == UPDATE_STRATEGY_PARTIAL:
351 |             # in partial mode we update the intersection of update_fields and
352 |             # properties found in the mapping file.
353 |             return {
354 |                 k: getattr(self, k)
355 |                 for k in self.clean_update_fields(
356 |                     index=index, update_fields=update_fields
357 |                 )
358 |             }
359 | 
360 |         raise ValueError("Invalid update strategy.")
361 | 
362 |     def as_search_action(self, *, index: str, action: str) -> dict:
363 |         """
364 |         Return an object as represented in a bulk api operation.
365 | 
366 |         Bulk API operations have a very specific format. This function will
367 |         call the standard `as_search_document` method on the object and then
368 |         wrap that up in the correct format for the action specified.
369 | 
370 |         https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html
371 | 
372 |         Args:
373 |             index: string, the name of the index in which the action is to
374 |                 be taken. Bulk operations are only every carried out on a single
375 |                 index at a time.
376 |             action: string ['index' | 'update' | 'delete'] - this decides
377 |                 how the final document is formatted.
378 | 
379 |         Returns a dictionary.
380 | 
381 |         """
382 |         if action not in ("index", "update", "delete"):
383 |             raise ValueError("Action must be 'index', 'update' or 'delete'.")
384 | 
385 |         document: dict[str, str | dict] = {
386 |             "_index": index,
387 |             "_op_type": action,
388 |             "_id": self.get_search_document_id(),
389 |         }
390 | 
391 |         if action == "index":
392 |             document["_source"] = self.as_search_document(index=index)
393 |         elif action == "update":
394 |             document["doc"] = self.as_search_document(index=index)
395 |         return document
396 | 
397 |     def fetch_search_document(self, *, index: str) -> ObjectApiResponse:
398 |         """Fetch the object's document from a search index by id."""
399 |         if not self.pk:  # type: ignore
400 |             raise ValueError("Object must have a primary key before being indexed.")
401 |         return get_client().get(index=index, id=self.get_search_document_id())
402 | 
403 |     def index_search_document(self, *, index: str) -> None:
404 |         """
405 |         Create or replace search document in named index.
406 | 
407 |         Checks the local cache to see if the document has changed,
408 |         and if not aborts the update, else pushes to ES, and then
409 |         resets the local cache. Cache timeout is set as "cache_expiry"
410 |         in the settings, and defaults to 60s.
411 | 
412 |         """
413 |         cache_key = self.search_document_cache_key
414 |         new_doc = self.as_search_document(index=index)
415 |         cached_doc = cache.get(cache_key)
416 |         if new_doc == cached_doc:
417 |             logger.debug("Search document for %r is unchanged, ignoring update.", self)
418 |             return
419 |         cache.set(cache_key, new_doc, timeout=get_setting("cache_expiry", 60))
420 |         _ = get_client().index(
421 |             index=index,
422 |             document=new_doc,
423 |             id=self.get_search_document_id(),
424 |         )
425 | 
426 |     def update_search_document(self, *, index: str, update_fields: list[str]) -> None:
427 |         """
428 |         Partial update of a document in named index.
429 | 
430 |         Partial updates are invoked via a call to save the document
431 |         with 'update_fields'. These fields are passed to the
432 |         as_search_document method so that it can build a partial
433 |         document. NB we don't just call as_search_document and then
434 |         strip the fields _not_ in update_fields as we are trying
435 |         to avoid possibly expensive operations in building the
436 |         source document. The canonical example for this method
437 |         is updating a single timestamp on a model - we don't want
438 |         to have to walk the model relations and build a document
439 |         in this case - we just want to push the timestamp.
440 | 
441 |         When POSTing a partial update the `as_search_document` doc
442 |         must be passed to the `client.update` wrapped in a "doc" node,
443 |         # noqa: E501, see: https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-update.html
444 | 
445 |         """
446 |         doc = self.as_search_document_update(index=index, update_fields=update_fields)
447 |         if not doc:
448 |             logger.debug("Ignoring object update as document is empty.")
449 |             return
450 |         retry_on_conflict = cast(int, get_setting("retry_on_conflict", 0))
451 |         _ = get_client().update(
452 |             index=index,
453 |             id=self.get_search_document_id(),
454 |             doc=doc,
455 |             retry_on_conflict=retry_on_conflict,
456 |         )
457 | 
458 |     def delete_search_document(self, *, index: str) -> None:
459 |         """Delete document from named index."""
460 |         cache.delete(self.search_document_cache_key)
461 |         _ = get_client().delete(index=index, id=self.get_search_document_id())
462 | 
463 | 
464 | class SearchQuery(models.Model):
465 |     """
466 |     Model used to capture ES queries and responses.
467 | 
468 |     For low-traffic sites it's useful to be able to replay
469 |     searches, and to track how a user filtered and searched.
470 |     This model can be used to store a search query and meta
471 |     information about the results (document type, id and score).
472 | 
473 |     """
474 | 
475 |     class TotalHitsRelation(models.TextChoices):
476 |         """The hits.total.relation response value."""
477 | 
478 |         ACCURATE = "eq", _lazy("Accurate hit count")
479 |         ESTIMATE = "gte", _lazy("Lower bound of total hits")
480 | 
481 |     class QueryType(models.TextChoices):
482 |         # whether this is a search query (returns results), or a count API
483 |         # query (returns the number of results, but no detail),
484 |         SEARCH = "SEARCH", _lazy("Search results")
485 |         COUNT = "COUNT", _lazy("Count only")
486 | 
487 |     user = models.ForeignKey(
488 |         settings.AUTH_USER_MODEL,
489 |         related_name="search_queries",
490 |         blank=True,
491 |         null=True,
492 |         help_text=_lazy("The user who made the search query (nullable)."),
493 |         on_delete=models.SET_NULL,
494 |     )
495 |     index = models.CharField(
496 |         max_length=100,
497 |         default="_all",
498 |         help_text=_lazy("The name of the Elasticsearch index(es) being queried."),
499 |     )
500 |     # The query property contains the raw DSL query, which can be arbitrarily complex -
501 |     # there is no one way of mapping input text to the query itself. However, it's
502 |     # often helpful to have the terms that the user themselves typed easily accessible
503 |     # without having to parse JSON.
504 |     search_terms = models.CharField(
505 |         max_length=400,
506 |         default="",
507 |         blank=True,
508 |         help_text=_lazy(
509 |             "Free text search terms used in the query, stored for easy reference."
510 |         ),
511 |     )
512 |     query = models.JSONField(
513 |         help_text=_lazy("The raw Elasticsearch DSL query."), encoder=DjangoJSONEncoder
514 |     )
515 |     query_type = models.CharField(
516 |         help_text=_lazy("Does this query return results, or just the hit count?"),
517 |         choices=QueryType.choices,
518 |         default=QueryType.SEARCH,
519 |         max_length=10,
520 |     )
521 |     hits = models.JSONField(
522 |         help_text=_lazy(
523 |             "The list of meta info for each of the query matches returned."
524 |         ),
525 |         blank=True,
526 |         null=True,
527 |         encoder=DjangoJSONEncoder,
528 |     )
529 |     total_hits = models.IntegerField(
530 |         default=0,
531 |         help_text=_lazy(
532 |             "Total number of matches found for the query (!= the hits returned)."
533 |         ),
534 |     )
535 |     total_hits_relation = models.CharField(
536 |         max_length=3,
537 |         default="",
538 |         blank=True,
539 |         choices=TotalHitsRelation.choices,
540 |         help_text=_lazy(
541 |             "Indicates whether this is an exact match ('eq') or a lower bound ('gte')"
542 |         ),
543 |     )
544 |     aggregations = models.JSONField(
545 |         help_text=_lazy("The raw aggregations returned from the query."),
546 |         encoder=DjangoJSONEncoder,
547 |         default=None,
548 |         blank=True,
549 |         null=True,
550 |     )
551 |     reference = models.CharField(
552 |         max_length=100,
553 |         default="",
554 |         blank=True,
555 |         help_text=_lazy(
556 |             "Custom reference used to identify and group related searches."
557 |         ),
558 |     )
559 |     executed_at = models.DateTimeField(
560 |         help_text=_lazy("When the search was executed - set via execute() method.")
561 |     )
562 |     duration = models.FloatField(
563 |         help_text=_lazy("Time taken to execute the search itself, in seconds.")
564 |     )
565 | 
566 |     class Meta:
567 |         app_label = "elasticsearch_django"
568 |         verbose_name = "Search query"
569 |         verbose_name_plural = "Search queries"
570 | 
571 |     def __str__(self) -> str:
572 |         return f"Query (id={self.pk}) run against index '{self.index}'"
573 | 
574 |     def __repr__(self) -> str:
575 |         return (
576 |             f"<SearchQuery id={self.pk} user={self.user} "
577 |             f"index='{self.index}' total_hits={self.total_hits} >"
578 |         )
579 | 
580 |     def __init__(self, *args: Any, **kwargs: Any) -> None:
581 |         self.query_response = kwargs.pop("query_response", None)
582 |         super().__init__(*args, **kwargs)
583 | 
584 |     def save(self, *args: Any, **kwargs: Any) -> SearchQuery:
585 |         if user := kwargs.pop("user", None):
586 |             self.user = user
587 |         if reference := kwargs.pop("reference", ""):
588 |             self.reference = reference
589 |         if search_terms := kwargs.pop("search_terms", ""):
590 |             self.search_terms = search_terms
591 |         super().save(*args, **kwargs)
592 |         return self
593 | 
594 |     def _hit_values(self, property_name: str) -> list[str | float]:
595 |         """Extract list of property values from each hit in search results."""
596 |         return [] if self.hits is None else [h[property_name] for h in self.hits]
597 | 
598 |     @property
599 |     def max_score(self) -> float:
600 |         """Max relevance score in the returned page."""
601 |         if self.hits:
602 |             return float(max(self._hit_values("score")))
603 |         return 0.0
604 | 
605 |     @property
606 |     def min_score(self) -> float:
607 |         """Min relevance score in the returned page."""
608 |         if self.hits:
609 |             return float(min(self._hit_values("score")))
610 |         return 0.0
611 | 
612 |     @property
613 |     def object_ids(self) -> list[str]:
614 |         """List of model ids extracted from hits."""
615 |         return self._hit_values("id")  # type: ignore
616 | 
617 |     @property
618 |     def page_slice(self) -> tuple[int, int] | None:
619 |         """Return the query from:size tuple (0-based)."""
620 |         return (
621 |             None
622 |             if self.query is None
623 |             else (self.query.get("from", 0), self.query.get("size", 10))
624 |         )
625 | 
626 |     @property
627 |     def page_from(self) -> int:
628 |         """1-based index of the first hit in the returned page."""
629 |         if self.page_size == 0:
630 |             return 0
631 |         if not self.page_slice:
632 |             return 0
633 |         return self.page_slice[0] + 1
634 | 
635 |     @property
636 |     def page_to(self) -> int:
637 |         """1-based index of the last hit in the returned page."""
638 |         return 0 if self.page_size == 0 else self.page_from + self.page_size - 1
639 | 
640 |     @property
641 |     def page_size(self) -> int:
642 |         """Return number of hits returned in this specific page."""
643 |         return 0 if self.hits is None else len(self.hits)
644 | 
645 |     @property
646 |     def has_aggs(self) -> bool:
647 |         """Return True if the query includes aggs."""
648 |         return "aggs" in self.query
649 | 
650 |     @property
651 |     def has_highlights(self) -> bool:
652 |         """Return True if the query includes highlights."""
653 |         if not self.query:
654 |             raise ValueError("Missing query attribute.")
655 |         return "highlight" in self.query
656 | 
657 |     @property
658 |     def has_fields(self) -> bool:
659 |         """Return True if the query includes explicit fields."""
660 |         if not self.query:
661 |             raise ValueError("Missing query attribute.")
662 |         return "fields" in self.query
663 | 
664 |     def search_rank_annotation(self, pk_field_name: str = "pk") -> Case | None:
665 |         """Return SQL CASE statement used to annotate results with rank."""
666 |         if not self.hits:
667 |             return None
668 |         case_when_rank = []
669 |         for rank, hit in enumerate(self.hits, start=1):
670 |             case_when_rank.append(When(**{pk_field_name: hit["id"]}, then=rank))
671 |         return Case(*case_when_rank)
672 | 
673 |     def search_score_annotation(self, pk_field_name: str = "pk") -> Case | None:
674 |         """Return SQL CASE statement used to annotate results with score."""
675 |         if not self.hits:
676 |             return None
677 |         case_when_score = []
678 |         for hit in self.hits:
679 |             # if custom sorting has been applied, score is null
680 |             score = None if hit["score"] is None else float(hit["score"])
681 |             case_when_score.append(When(**{pk_field_name: hit["id"]}, then=score))
682 |         return Case(*case_when_score)
683 | 
684 |     def get_hit(self, doc_id: str) -> dict:
685 |         """
686 |         Return the hit with a give document id.
687 | 
688 |         Raises KeyError if the id does not exist.
689 | 
690 |         """
691 |         if hit := [h for h in self.hits if h["id"] == str(doc_id)]:
692 |             return hit[0]
693 |         raise KeyError("Document id not found in search results.")
694 | 
695 |     def get_doc_rank(self, doc_id: str) -> int:
696 |         """Return the position of a document in the results."""
697 |         return self.object_ids.index(str(doc_id))
698 | 
699 |     def get_doc_score(self, doc_id: str) -> float:
700 |         """Return specific document score."""
701 |         return self.get_hit(str(doc_id))["score"]
702 | 
703 |     def get_doc_highlights(self, doc_id: str) -> dict | None:
704 |         """Return specific document highlights."""
705 |         return self.get_hit(str(doc_id)).get("highlight")
706 | 
707 |     @classmethod
708 |     def do_search(
709 |         self,
710 |         index: str,
711 |         query: dict,
712 |         client: Elasticsearch = DEFAULT_CLIENT,
713 |         **search_kwargs: Any,
714 |     ) -> SearchQuery:
715 |         """Perform a search query and parse the response."""
716 |         # if "from" has been passed in we need to convert it to "from_"
717 |         # for the search method, ensuring that we don't overwrite
718 |         # "from_" if it's been passed in correctly.
719 |         from_ = search_kwargs.pop("from", DEFAULT_FROM)
720 |         search_kwargs.setdefault("from_", from_)
721 |         search_kwargs.setdefault("size", DEFAULT_PAGE_SIZE)
722 |         search_kwargs.setdefault("_source", DEFAULT_INCLUDE_SOURCE)
723 |         with stopwatch() as timer:
724 |             response = client.search(index=index, query=query, **search_kwargs)
725 |         parser = SearchResponseParser(response)
726 |         # HACK: we want the "query" that we store to be the raw wire query, which
727 |         # is a dict that contains query, aggs, highlights, from_, size, min_score,
728 |         # etc.
729 |         raw_query = {"query": copy.deepcopy(query)}
730 |         raw_query.update(**search_kwargs)
731 |         # now we need to replace "from_" with "from" for the stored
732 |         # JSON as this is what gets sent over the wire.
733 |         raw_query["from"] = raw_query.pop("from_")
734 |         return SearchQuery(
735 |             index=index,
736 |             query=raw_query,
737 |             query_type=SearchQuery.QueryType.SEARCH,
738 |             hits=parser.hits,
739 |             aggregations=parser.aggregations,
740 |             total_hits=parser.total_hits,
741 |             total_hits_relation=parser.total_hits_relation,
742 |             executed_at=timer.started_at,
743 |             duration=timer.elapsed,
744 |             query_response=response,
745 |         )
746 | 
747 |     @classmethod
748 |     def do_count(
749 |         self,
750 |         index: str,
751 |         query: dict,
752 |         client: Elasticsearch = DEFAULT_CLIENT,
753 |         **count_kwargs: Any,
754 |     ) -> SearchQuery:
755 |         """Perform a count query and parse the response."""
756 |         with stopwatch() as timer:
757 |             response = client.count(index=index, query=query, **count_kwargs)
758 |         parser = CountResponseParser(response)
759 |         return SearchQuery(
760 |             index=index,
761 |             query=query,
762 |             query_type=SearchQuery.QueryType.COUNT,
763 |             # hits=[],
764 |             # aggregations={},
765 |             total_hits=parser.total_hits,
766 |             total_hits_relation=parser.total_hits_relation,
767 |             executed_at=timer.started_at,
768 |             duration=timer.elapsed,
769 |         )
770 | 
771 | 
772 | class SearchResponseParser:
773 |     def __init__(self, response: ObjectApiResponse) -> None:
774 |         self.body = response.body
775 |         self._hits = self.body.get("hits", {})
776 | 
777 |     @property
778 |     def raw_hits(self) -> list[dict]:
779 |         return self._hits.get("hits", {})
780 | 
781 |     @property
782 |     def hits(self) -> list[dict]:
783 |         def _hit(hit: dict) -> dict:
784 |             retval = {
785 |                 "id": hit["_id"],
786 |                 "index": hit["_index"],
787 |                 "score": hit["_score"],
788 |             }
789 |             if highlight := hit.get("highlight"):
790 |                 retval["highlight"] = highlight
791 |             if fields := hit.get("fields"):
792 |                 retval["fields"] = fields
793 |             return retval
794 | 
795 |         return [_hit(h) for h in self.raw_hits]
796 | 
797 |     @property
798 |     def total(self) -> dict:
799 |         return self._hits.get("total", {})
800 | 
801 |     @property
802 |     def total_hits(self) -> int:
803 |         return self.total.get("value", 0)
804 | 
805 |     @property
806 |     def total_hits_relation(self) -> str:
807 |         return self.total.get("relation", "")
808 | 
809 |     @property
810 |     def aggregations(self) -> dict:
811 |         return self.body.get("aggregations", {})
812 | 
813 | 
814 | class CountResponseParser:
815 |     def __init__(self, response: ObjectApiResponse) -> None:
816 |         self.body = response.body
817 | 
818 |     @property
819 |     def total_hits(self) -> int:
820 |         return self.body.get("count", 0)
821 | 
822 |     @property
823 |     def total_hits_relation(self) -> str:
824 |         return str(SearchQuery.TotalHitsRelation.ACCURATE)
825 | 


--------------------------------------------------------------------------------
/elasticsearch_django/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunojuno/elasticsearch-django/f9f82e5843e6b071cbd4b3a01ea63caa399db075/elasticsearch_django/py.typed


--------------------------------------------------------------------------------
/elasticsearch_django/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Access to SEARCH_SETTINGS Django conf.
  3 | 
  4 | The SEARCH_SETTINGS dict in the Django conf contains three
  5 | major blocks - 'connections', 'indexes' and 'settings'.
  6 | 
  7 | This module contains helper functions to extract information
  8 | from the settings, as well as validation of settings.
  9 | 
 10 | """
 11 | from __future__ import annotations
 12 | 
 13 | import json
 14 | import os
 15 | from typing import Dict, Union
 16 | 
 17 | from django.apps import apps
 18 | from django.conf import settings as django_settings
 19 | from django.db.models import Model
 20 | from elasticsearch import Elasticsearch
 21 | 
 22 | SettingType = Union[list, dict, int, str, bool]
 23 | SettingsType = Dict[str, SettingType]
 24 | 
 25 | 
 26 | def get_client(connection: str = "default") -> Elasticsearch:
 27 |     """Return configured elasticsearch client."""
 28 |     conn_settings = get_connection_settings(connection)
 29 |     if isinstance(conn_settings, (str, list)):
 30 |         return Elasticsearch(conn_settings)
 31 |     return Elasticsearch(**conn_settings)
 32 | 
 33 | 
 34 | def get_settings() -> SettingsType:
 35 |     """Return settings from Django conf."""
 36 |     return django_settings.SEARCH_SETTINGS["settings"]
 37 | 
 38 | 
 39 | def get_setting(key, *default: str | int | bool | list | dict) -> SettingType:
 40 |     """Return specific search setting from Django conf."""
 41 |     if default:
 42 |         return get_settings().get(key, default[0])
 43 |     else:
 44 |         return get_settings()[key]
 45 | 
 46 | 
 47 | def set_setting(key: str, value: SettingType) -> None:
 48 |     """Set specific search setting in Django conf settings."""
 49 |     get_settings()[key] = value
 50 | 
 51 | 
 52 | def get_connection_settings(connection: str = "default") -> str | list | dict:
 53 |     """Return index settings from Django conf."""
 54 |     return django_settings.SEARCH_SETTINGS["connections"][connection]
 55 | 
 56 | 
 57 | def get_index_config(index: str) -> dict[str, list[str]]:
 58 |     """Return index settings from Django conf."""
 59 |     return django_settings.SEARCH_SETTINGS["indexes"][index]
 60 | 
 61 | 
 62 | def get_index_names() -> list[str]:
 63 |     """Return list of the names of all configured indexes."""
 64 |     return list(django_settings.SEARCH_SETTINGS["indexes"].keys())
 65 | 
 66 | 
 67 | def get_index_mapping(index: str) -> dict:
 68 |     """
 69 |     Return the JSON mapping file for an index.
 70 | 
 71 |     Mappings are stored as JSON files in the mappings subdirectory of this
 72 |     app. They must be saved as {{index}}.json.
 73 | 
 74 |     Args:
 75 |         index: string, the name of the index to look for.
 76 | 
 77 |     """
 78 |     # app_path = apps.get_app_config('elasticsearch_django').path
 79 |     mappings_dir = get_setting("mappings_dir")
 80 |     filename = "%s.json" % index
 81 |     path = os.path.join(mappings_dir, filename)
 82 |     with open(path) as f:
 83 |         return json.load(f)
 84 | 
 85 | 
 86 | def get_model_index_properties(instance: Model, index: str) -> list[str]:
 87 |     """Return the list of properties specified for a model in an index."""
 88 |     mapping = get_index_mapping(index)
 89 |     return list(mapping["mappings"]["properties"].keys())
 90 | 
 91 | 
 92 | def get_index_models(index: str) -> list[Model]:
 93 |     """Return list of models configured for a named index."""
 94 |     models: list[Model] = []
 95 |     for app_model in get_index_config(index).get("models"):
 96 |         app, model = app_model.split(".")
 97 |         models.append(apps.get_model(app, model))
 98 |     return models
 99 | 
100 | 
101 | def get_model_indexes(model: Model) -> list[str]:
102 |     """
103 |     Return list of all indexes in which a model is configured.
104 | 
105 |     A model may be configured to appear in multiple indexes. This function
106 |     will return the names of the indexes as a list of strings. This is
107 |     useful if you want to know which indexes need updating when a model
108 |     is saved.
109 | 
110 |     Args:
111 |         model: a Django model class.
112 | 
113 |     """
114 |     indexes: list[str] = []
115 |     for index in get_index_names():
116 |         for app_model in get_index_models(index):
117 |             if app_model == model:
118 |                 indexes.append(index)
119 |     return indexes
120 | 
121 | 
122 | def get_document_models() -> dict[str, Model]:
123 |     """Return dict of index.doc_type: model."""
124 |     mappings: dict[str, Model] = {}
125 |     for i in get_index_names():
126 |         for m in get_index_models(i):
127 |             mappings[f"{i}.{m._meta.model_name}"] = m
128 |     return mappings
129 | 
130 | 
131 | def get_document_model(index: str, doc_type: str) -> Model | None:
132 |     """Return model for a given index.doc_type combination."""
133 |     raise DeprecationWarning("Mapping types have been removed from ES7.x")
134 | 
135 | 
136 | def auto_sync(instance: Model) -> bool:
137 |     """Return True if auto_sync is on for the model (instance)."""
138 |     # this allows us to turn off sync temporarily - e.g. when doing bulk updates
139 |     if not get_setting("auto_sync"):
140 |         return False
141 |     model_name = f"{instance._meta.app_label}.{instance._meta.model_name}"
142 |     if model_name in get_setting("never_auto_sync", *[]):
143 |         return False
144 |     return True
145 | 


--------------------------------------------------------------------------------
/elasticsearch_django/signals.py:
--------------------------------------------------------------------------------
 1 | import django.dispatch
 2 | 
 3 | # signal fired just before calling model.index_search_document
 4 | # providing_args=["instance", "index"]
 5 | pre_index = django.dispatch.Signal()
 6 | 
 7 | # signal fired just before calling model.update_search_document
 8 | # providing_args=["instance", "index", "update_fields"]
 9 | pre_update = django.dispatch.Signal()
10 | 
11 | # signal fired just before calling model.delete_search_document
12 | # providing_args=["instance", "index"]
13 | pre_delete = django.dispatch.Signal()
14 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | if __name__ == "__main__":
 6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tests.settings")
 7 | 
 8 |     from django.core.management import execute_from_command_line
 9 | 
10 |     execute_from_command_line(sys.argv)
11 | 


--------------------------------------------------------------------------------
/mappings/examples.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mappings": {
 3 |         "properties": {
 4 |             "simple_field_1": {
 5 |                 "type": "integer"
 6 |             },
 7 |             "simple_field_2": {
 8 |                 "type": "text",
 9 |                 "fields": {
10 |                     "keyword": {
11 |                         "type": "keyword",
12 |                         "ignore_above": 256
13 |                     }
14 |                 }
15 |             },
16 |             "complex_field": {
17 |                 "type": "text",
18 |                 "fields": {
19 |                     "keyword": {
20 |                         "type": "keyword",
21 |                         "ignore_above": 256
22 |                     }
23 |                 }
24 |             }
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | strict_optional=True
 3 | ignore_missing_imports=True
 4 | follow_imports=silent
 5 | warn_redundant_casts=True
 6 | warn_unused_ignores = true
 7 | warn_unreachable = true
 8 | disallow_untyped_defs = true
 9 | disallow_incomplete_defs = true
10 | 
11 | # Disable mypy for admin.py files
12 | [mypy-*.admin]
13 | ignore_errors=True
14 | 
15 | # Disable mypy for migrations
16 | [mypy-*.migrations.*]
17 | ignore_errors=True
18 | 
19 | # Disable mypy for settings
20 | [mypy-*.settings.*]
21 | ignore_errors=True
22 | 
23 | # Disable mypy for tests
24 | [mypy-*.tests.*]
25 | ignore_errors=True
26 | 


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | create = true
3 | in-project = true
4 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "elasticsearch-django"
 3 | version = "8.5.2"
 4 | description = "Elasticsearch Django app."
 5 | license = "MIT"
 6 | authors = ["YunoJuno <code@yunojuno.com>"]
 7 | maintainers = ["YunoJuno <code@yunojuno.com>"]
 8 | readme = "README.md"
 9 | homepage = "https://github.com/yunojuno/elasticsearch-django"
10 | repository = "https://github.com/yunojuno/elasticsearch-django"
11 | classifiers = [
12 |     "Environment :: Web Environment",
13 |     "Framework :: Django :: 3.2",
14 |     "Framework :: Django :: 4.0",
15 |     "Framework :: Django :: 4.1",
16 |     "Framework :: Django :: 4.2",
17 |     "Framework :: Django :: 5.0",
18 |     "Operating System :: OS Independent",
19 |     "Programming Language :: Python :: 3 :: Only",
20 |     "Programming Language :: Python :: 3.8",
21 |     "Programming Language :: Python :: 3.9",
22 |     "Programming Language :: Python :: 3.10",
23 |     "Programming Language :: Python :: 3.11",
24 |     "Programming Language :: Python :: 3.12",
25 | ]
26 | 
27 | [tool.poetry.dependencies]
28 | python = "^3.8"
29 | django = "^3.2 || ^4.0 || ^5.0"
30 | elasticsearch = "^8.0"
31 | simplejson = "*"
32 | 
33 | [tool.poetry.dev-dependencies]
34 | black = "*"
35 | coverage = "*"
36 | mypy = "*"
37 | pre-commit = "*"
38 | pytest = "*"
39 | pytest-cov = "*"
40 | pytest-django = "*"
41 | ruff = "*"
42 | tox = "*"
43 | 
44 | [build-system]
45 | requires = ["poetry>=0.12"]
46 | build-backend = "poetry.masonry.api"
47 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | DJANGO_SETTINGS_MODULE=tests.settings
3 | 
4 | python_classes =
5 |     Test*
6 |     *Tests
7 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | default_config = "tests.apps.TestAppConfig"
2 | 


--------------------------------------------------------------------------------
/tests/admin.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | 
 3 | from .models import ExampleModel
 4 | 
 5 | 
 6 | class ExampleModelAdmin(admin.ModelAdmin):
 7 |     list_display = ("simple_field_1", "simple_field_2", "complex_field")
 8 | 
 9 | 
10 | admin.site.register(ExampleModel, ExampleModelAdmin)
11 | 


--------------------------------------------------------------------------------
/tests/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 | 
3 | 
4 | class TestAppConfig(AppConfig):
5 |     name = "tests"
6 |     verbose_name = "Test App"
7 |     default_auto_field = "django.db.models.AutoField"
8 | 


--------------------------------------------------------------------------------
/tests/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 3.0.8 on 2020-07-29 08:35
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | import elasticsearch_django.models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     initial = True
10 | 
11 |     dependencies = []
12 | 
13 |     operations = [
14 |         migrations.CreateModel(
15 |             name="ExampleModel",
16 |             fields=[
17 |                 (
18 |                     "id",
19 |                     models.AutoField(
20 |                         auto_created=True,
21 |                         primary_key=True,
22 |                         serialize=False,
23 |                         verbose_name="ID",
24 |                     ),
25 |                 ),
26 |                 ("simple_field_1", models.IntegerField()),
27 |                 ("simple_field_2", models.CharField(max_length=100)),
28 |                 ("complex_field", models.FileField(upload_to="")),
29 |             ],
30 |             bases=(elasticsearch_django.models.SearchDocumentMixin, models.Model),
31 |         ),
32 |     ]
33 | 


--------------------------------------------------------------------------------
/tests/migrations/0002_examplemodel_user.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.1 on 2022-08-28 14:52
 2 | 
 3 | import django.db.models.deletion
 4 | from django.conf import settings
 5 | from django.db import migrations, models
 6 | 
 7 | 
 8 | class Migration(migrations.Migration):
 9 |     dependencies = [
10 |         migrations.swappable_dependency(settings.AUTH_USER_MODEL),
11 |         ("tests", "0001_initial"),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name="examplemodel",
17 |             name="user",
18 |             field=models.OneToOneField(
19 |                 blank=True,
20 |                 null=True,
21 |                 on_delete=django.db.models.deletion.CASCADE,
22 |                 to=settings.AUTH_USER_MODEL,
23 |             ),
24 |         ),
25 |     ]
26 | 


--------------------------------------------------------------------------------
/tests/migrations/0003_examplemodelwithcustomprimarykey.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.1.1 on 2022-09-27 15:23
 2 | 
 3 | import django.db.models.deletion
 4 | from django.conf import settings
 5 | from django.db import migrations, models
 6 | 
 7 | import elasticsearch_django.models
 8 | 
 9 | 
10 | class Migration(migrations.Migration):
11 |     dependencies = [
12 |         migrations.swappable_dependency(settings.AUTH_USER_MODEL),
13 |         ("tests", "0002_examplemodel_user"),
14 |     ]
15 | 
16 |     operations = [
17 |         migrations.CreateModel(
18 |             name="ExampleModelWithCustomPrimaryKey",
19 |             fields=[
20 |                 (
21 |                     "simple_field_1",
22 |                     models.IntegerField(primary_key=True, serialize=False),
23 |                 ),
24 |                 ("simple_field_2", models.CharField(max_length=100)),
25 |                 ("complex_field", models.FileField(upload_to="")),
26 |                 (
27 |                     "user",
28 |                     models.OneToOneField(
29 |                         blank=True,
30 |                         null=True,
31 |                         on_delete=django.db.models.deletion.CASCADE,
32 |                         to=settings.AUTH_USER_MODEL,
33 |                     ),
34 |                 ),
35 |             ],
36 |             bases=(elasticsearch_django.models.SearchDocumentMixin, models.Model),
37 |         ),
38 |     ]
39 | 


--------------------------------------------------------------------------------
/tests/migrations/0004_modela_modelb.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.1.1 on 2022-10-01 14:00
 2 | 
 3 | import uuid
 4 | 
 5 | import django.db.models.deletion
 6 | from django.db import migrations, models
 7 | 
 8 | import elasticsearch_django.models
 9 | 
10 | 
11 | class Migration(migrations.Migration):
12 |     dependencies = [
13 |         ("tests", "0003_examplemodelwithcustomprimarykey"),
14 |     ]
15 | 
16 |     operations = [
17 |         migrations.CreateModel(
18 |             name="ModelA",
19 |             fields=[
20 |                 (
21 |                     "id",
22 |                     models.AutoField(
23 |                         auto_created=True,
24 |                         primary_key=True,
25 |                         serialize=False,
26 |                         verbose_name="ID",
27 |                     ),
28 |                 ),
29 |                 ("field_1", models.UUIDField(default=uuid.uuid4)),
30 |                 ("field_2", models.CharField(max_length=100)),
31 |             ],
32 |         ),
33 |         migrations.CreateModel(
34 |             name="ModelB",
35 |             fields=[
36 |                 (
37 |                     "id",
38 |                     models.AutoField(
39 |                         auto_created=True,
40 |                         primary_key=True,
41 |                         serialize=False,
42 |                         verbose_name="ID",
43 |                     ),
44 |                 ),
45 |                 (
46 |                     "source",
47 |                     models.OneToOneField(
48 |                         on_delete=django.db.models.deletion.CASCADE, to="tests.modela"
49 |                     ),
50 |                 ),
51 |             ],
52 |             bases=(elasticsearch_django.models.SearchDocumentMixin, models.Model),
53 |         ),
54 |     ]
55 | 


--------------------------------------------------------------------------------
/tests/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yunojuno/elasticsearch-django/f9f82e5843e6b071cbd4b3a01ea63caa399db075/tests/migrations/__init__.py


--------------------------------------------------------------------------------
/tests/models.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | 
 3 | from django.conf import settings
 4 | from django.db import models
 5 | 
 6 | from elasticsearch_django.models import (
 7 |     SearchDocumentManagerMixin,
 8 |     SearchDocumentMixin,
 9 |     SearchResultsQuerySet,
10 | )
11 | 
12 | 
13 | class ExampleModelQuerySet(SearchResultsQuerySet):
14 |     pass
15 | 
16 | 
17 | class ExampleModelManager(SearchDocumentManagerMixin, models.Manager):
18 |     def get_search_queryset(self, index="_all"):
19 |         return self.all()
20 | 
21 | 
22 | class ExampleModel(SearchDocumentMixin, models.Model):
23 |     """Model class for use in tests."""
24 | 
25 |     user = models.OneToOneField(
26 |         settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True
27 |     )
28 |     simple_field_1 = models.IntegerField()
29 |     simple_field_2 = models.CharField(max_length=100)
30 |     complex_field = models.FileField()
31 | 
32 |     objects = ExampleModelManager.from_queryset(ExampleModelQuerySet)()
33 | 
34 |     def get_search_document_id(self) -> str:
35 |         return f"{self.simple_field_1}_{self.simple_field_2}"
36 | 
37 |     def user_name(self) -> str:
38 |         return self.user.get_full_name() if self.user else "Anonymous"
39 | 
40 |     def as_search_document(self, index="_all"):
41 |         return {
42 |             "simple_field_1": self.simple_field_1,
43 |             "simple_field_2": self.simple_field_2,
44 |             "complex_field": str(self.complex_field),
45 |             "user_name": self.user_name(),
46 |         }
47 | 
48 | 
49 | class ExampleModelWithCustomPrimaryKey(SearchDocumentMixin, models.Model):
50 |     """Model class with a custom primary key for use in tests."""
51 | 
52 |     user = models.OneToOneField(
53 |         settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True
54 |     )
55 |     simple_field_1 = models.IntegerField(primary_key=True)
56 |     simple_field_2 = models.CharField(max_length=100)
57 |     complex_field = models.FileField()
58 | 
59 |     objects = ExampleModelManager()
60 | 
61 |     def user_name(self) -> str:
62 |         return self.user.get_full_name() if self.user else "Anonymous"
63 | 
64 |     def as_search_document(self, index="_all"):
65 |         return {
66 |             "simple_field_1": self.simple_field_1,
67 |             "simple_field_2": self.simple_field_2,
68 |             "complex_field": str(self.complex_field),
69 |             "user_name": self.user_name(),
70 |         }
71 | 
72 | 
73 | # === Compound models ===
74 | 
75 | 
76 | class ModelAQuerySet(SearchResultsQuerySet):
77 |     # this is the field used as the ID for the search documents
78 |     search_document_id_field = "field_1"
79 | 
80 | 
81 | class ModelA(models.Model):
82 |     field_1 = models.UUIDField(default=uuid4)
83 |     field_2 = models.CharField(max_length=100)
84 |     objects = ModelAQuerySet.as_manager()
85 | 
86 | 
87 | class ModelB(SearchDocumentMixin, models.Model):
88 |     source = models.OneToOneField(ModelA, on_delete=models.CASCADE)
89 | 
90 |     def get_search_document_id(self) -> str:
91 |         return str(self.source.field_1)
92 | 
93 |     def as_search_document(self, *, index: str) -> dict:
94 |         return {"field_2": self.source.field_2, "extra_info": "some other data"}
95 | 


--------------------------------------------------------------------------------
/tests/settings.py:
--------------------------------------------------------------------------------
  1 | from os import getenv
  2 | 
  3 | from django.core.exceptions import ImproperlyConfigured
  4 | 
  5 | DEBUG = True
  6 | USE_TZ = True
  7 | 
  8 | DATABASES = {
  9 |     "default": {
 10 |         "ENGINE": "django.db.backends.sqlite3",
 11 |         "NAME": "elasticsearch_django.db",
 12 |     }
 13 | }
 14 | 
 15 | INSTALLED_APPS = (
 16 |     "django.contrib.admin",
 17 |     "django.contrib.auth",
 18 |     "django.contrib.sessions",
 19 |     "django.contrib.contenttypes",
 20 |     "django.contrib.sites",
 21 |     "django.contrib.staticfiles",
 22 |     "django.contrib.messages",
 23 |     "elasticsearch_django",
 24 |     "tests",
 25 | )
 26 | 
 27 | MIDDLEWARE = [
 28 |     "django.middleware.security.SecurityMiddleware",
 29 |     "django.contrib.sessions.middleware.SessionMiddleware",
 30 |     "django.middleware.common.CommonMiddleware",
 31 |     "django.middleware.csrf.CsrfViewMiddleware",
 32 |     "django.contrib.auth.middleware.AuthenticationMiddleware",
 33 |     "django.contrib.messages.middleware.MessageMiddleware",
 34 |     "django.middleware.clickjacking.XFrameOptionsMiddleware",
 35 | ]
 36 | 
 37 | TEMPLATES = [
 38 |     {
 39 |         "BACKEND": "django.template.backends.django.DjangoTemplates",
 40 |         "DIRS": [],
 41 |         "APP_DIRS": True,
 42 |         "OPTIONS": {
 43 |             "context_processors": [
 44 |                 "django.template.context_processors.debug",
 45 |                 "django.template.context_processors.request",
 46 |                 "django.contrib.auth.context_processors.auth",
 47 |                 "django.contrib.messages.context_processors.messages",
 48 |             ]
 49 |         },
 50 |     }
 51 | ]
 52 | 
 53 | SECRET_KEY = "elasticsearch_django"  # noqa: S105
 54 | 
 55 | ROOT_URLCONF = "tests.urls"
 56 | 
 57 | APPEND_SLASH = True
 58 | 
 59 | STATIC_URL = "/static/"
 60 | STATIC_ROOT = "staticfiles"
 61 | 
 62 | TIME_ZONE = "UTC"
 63 | 
 64 | SITE_ID = 1
 65 | 
 66 | ###########
 67 | # LOGGING #
 68 | ###########
 69 | LOGGING = {
 70 |     "version": 1,
 71 |     "disable_existing_loggers": False,
 72 |     "formatters": {"simple": {"format": "%(levelname)s %(message)s"}},
 73 |     "handlers": {
 74 |         "console": {
 75 |             "level": "DEBUG",
 76 |             "class": "logging.StreamHandler",
 77 |             "formatter": "simple",
 78 |         },
 79 |         "null": {"level": "DEBUG", "class": "logging.NullHandler"},
 80 |     },
 81 |     "loggers": {
 82 |         "": {"handlers": ["null"], "propagate": True, "level": "DEBUG"},
 83 |         "elasticsearch_django": {
 84 |             "handlers": ["console"],
 85 |             "level": "DEBUG",
 86 |             "propagate": False,
 87 |         },
 88 |         # 'django': {
 89 |         #     'handlers': ['console'],
 90 |         #     'level': getenv('LOGGING_LEVEL_DJANGO', 'WARNING'),
 91 |         #     'propagate': False,
 92 |         # },
 93 |         # 'django.db.backends': {
 94 |         #     'level': 'ERROR',
 95 |         #     'handlers': ['console'],
 96 |         #     'propagate': False,
 97 |         # },
 98 |         # 'elasticsearch': {
 99 |         #     'handlers': ['console'],
100 |         #     'level': getenv('LOGGING_LEVEL_SEARCH', 'WARNING'),
101 |         #     'propagate': False,
102 |         # },
103 |         # 'elasticsearch.trace': {
104 |         #     'handlers': ['console'],
105 |         #     'level': getenv('LOGGING_LEVEL_SEARCH', 'WARNING'),
106 |         #     'propagate': False,
107 |         # },
108 |         # 'requests': {
109 |         #     'handlers': ['console'],
110 |         #     'level': getenv('LOGGING_LEVEL_REQUESTS', 'WARNING'),
111 |         #     'propagate': False,
112 |         # },
113 |         # 'requests.packages.urllib3': {
114 |         #     'handlers': ['console'],
115 |         #     'level': getenv('LOGGING_LEVEL_REQUESTS', 'WARNING'),
116 |         #     'propagate': False,
117 |         # },
118 |         # 'urllib3': {
119 |         #     'handlers': ['console'],
120 |         #     'level': getenv('LOGGING_LEVEL_REQUESTS', 'WARNING'),
121 |         #     'propagate': False,
122 |         # },
123 |     },
124 | }
125 | 
126 | ELASTICSEARCH_URL = getenv("ELASTICSEARCH_URL", "https://localhost:9200")
127 | 
128 | SEARCH_SETTINGS = {
129 |     "connections": {
130 |         "default": ELASTICSEARCH_URL,
131 |         "custom": {
132 |             "hosts": "localhost",
133 |             "cloud_id": "foo",
134 |             "api_key": "bar",
135 |         },
136 |     },
137 |     "indexes": {
138 |         # name of the index
139 |         "examples": {
140 |             "models": [
141 |                 # model used to populate the index, in app.model format
142 |                 "tests.ExampleModel",
143 |             ]
144 |         },
145 |     },
146 |     "settings": {
147 |         # batch size for ES bulk api operations
148 |         "chunk_size": 500,
149 |         # default page size for search results
150 |         "page_size": 25,
151 |         # set to False to prevent automatic signal connections
152 |         "auto_sync": True,
153 |         # List of models which will never auto_sync even if auto_sync is True
154 |         # Use the same app.model format as in 'indexes' above.
155 |         "never_auto_sync": [],
156 |         # retry count used on update in case of a conflict
157 |         "retry_on_conflict": 0,
158 |         "update_strategy": "full",
159 |         # if True, raise ImproperlyConfigured if an index has no mapping file
160 |         "strict_validation": False,
161 |         # path/to/mappings/dir - where mapping files will be expected
162 |         "mappings_dir": "mappings",
163 |         # db alias to use for the SearchDocumentManagerMixin method
164 |         "in_search_queryset_db_alias": "foo",
165 |     },
166 | }
167 | 
168 | if not DEBUG:
169 |     raise ImproperlyConfigured("This project is only intended to be used for testing.")
170 | 


--------------------------------------------------------------------------------
/tests/test_apps.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | 
  3 | import pytest
  4 | from django.core.exceptions import ImproperlyConfigured
  5 | 
  6 | import elasticsearch_django
  7 | from elasticsearch_django.apps import (
  8 |     ElasticAppConfig,
  9 |     _connect_signals,
 10 |     _delete_from_search_index,
 11 |     _on_model_delete,
 12 |     _on_model_save,
 13 |     _update_search_index,
 14 |     _validate_config,
 15 |     _validate_mapping,
 16 |     _validate_model,
 17 | )
 18 | from elasticsearch_django.models import SearchDocumentMixin
 19 | 
 20 | from .models import ExampleModel
 21 | 
 22 | 
 23 | class SearchAppsConfigTests:
 24 |     """Tests for the apps module ready function."""
 25 | 
 26 |     @mock.patch("elasticsearch_django.apps.settings.get_setting", lambda x: False)
 27 |     @mock.patch("elasticsearch_django.apps._validate_config")
 28 |     @mock.patch("elasticsearch_django.apps._connect_signals")
 29 |     def test_ready(self, mock_signals, mock_config):
 30 |         """Test the AppConfig.ready method."""
 31 |         # mock_setting = True
 32 |         config = ElasticAppConfig("foo_bar", elasticsearch_django)
 33 |         config.ready()
 34 |         mock_config.assert_called_once_with(False)
 35 |         mock_signals.assert_called_once_with()
 36 | 
 37 | 
 38 | class SearchAppsValidationTests:
 39 |     """Tests for the apps module validation functions."""
 40 | 
 41 |     def test__validate_model(self):
 42 |         """Test _validate_model function."""
 43 |         # 1. model doesn't implement as_search_document
 44 |         with mock.patch("tests.models.ExampleModel") as tm:
 45 |             del tm.as_search_document
 46 |             with pytest.raises(ImproperlyConfigured):
 47 |                 _validate_model(tm)
 48 | 
 49 |         # 2. model.objects doesn't implement get_search_queryset
 50 |         with mock.patch("tests.models.ExampleModel") as tm:
 51 |             del tm.objects.get_search_queryset
 52 |             with pytest.raises(ImproperlyConfigured):
 53 |                 _validate_model(tm)
 54 | 
 55 |         # model should pass
 56 |         with mock.patch("tests.models.ExampleModel") as tm:
 57 |             _validate_model(tm)
 58 | 
 59 |     @mock.patch("elasticsearch_django.apps.settings")
 60 |     def test__validate_mapping(self, mock_settings):
 61 |         """Test _validate_model function."""
 62 |         _validate_mapping("foo", strict=True)
 63 |         mock_settings.get_index_mapping.assert_called_once_with("foo")
 64 |         mock_settings.get_index_mapping.side_effect = IOError()
 65 |         with pytest.raises(ImproperlyConfigured):
 66 |             _validate_mapping("foo", strict=True)
 67 |         # shouldn't raise error
 68 |         _validate_mapping("foo", strict=False)
 69 | 
 70 |     @mock.patch("elasticsearch_django.apps.settings")
 71 |     @mock.patch("elasticsearch_django.apps._validate_model")
 72 |     @mock.patch("elasticsearch_django.apps._validate_mapping")
 73 |     def test__validate_config(self, mock_mapping, mock_model, mock_settings):
 74 |         """Test _validate_model function."""
 75 |         mock_settings.get_index_names.return_value = ["foo"]
 76 |         mock_settings.get_setting.return_value = "full"
 77 |         mock_settings.get_index_models.return_value = [ExampleModel]
 78 |         _validate_config()
 79 |         mock_mapping.assert_called_once_with("foo", strict=False)
 80 |         mock_model.assert_called_once_with(ExampleModel)
 81 | 
 82 |     @mock.patch("elasticsearch_django.apps.settings")
 83 |     @mock.patch("elasticsearch_django.apps._validate_model")
 84 |     @mock.patch("elasticsearch_django.apps._validate_mapping")
 85 |     def test__validate_config_invalid_strategy(
 86 |         self, mock_mapping, mock_model, mock_settings
 87 |     ):
 88 |         """Test _validate_model function with an invalid update_strategy."""
 89 |         mock_settings.get_index_names.return_value = ["foo"]
 90 |         mock_settings.get_setting.return_value = "foo"
 91 |         mock_settings.get_index_models.return_value = [ExampleModel]
 92 |         with pytest.raises(ImproperlyConfigured):
 93 |             _validate_config()
 94 | 
 95 |     @mock.patch("elasticsearch_django.apps.signals")
 96 |     @mock.patch("elasticsearch_django.apps.settings")
 97 |     def test__connect_signals(self, mock_settings, mock_signals):
 98 |         """Test the _connect_signals function."""
 99 |         # this should connect up the signals once, for ExampleModel
100 |         mock_settings.get_index_names.return_value = ["foo"]
101 |         mock_settings.get_index_models.return_value = [ExampleModel]
102 |         _connect_signals()
103 |         mock_signals.post_save.connect.assert_called_once_with(
104 |             _on_model_save, sender=ExampleModel, dispatch_uid="examplemodel.post_save"
105 |         )
106 |         mock_signals.post_delete.connect.assert_called_once_with(
107 |             _on_model_delete,
108 |             sender=ExampleModel,
109 |             dispatch_uid="examplemodel.post_delete",
110 |         )
111 | 
112 |     @mock.patch("elasticsearch_django.apps._delete_from_search_index")
113 |     def test__on_model_delete(self, mock_delete):
114 |         """Test the _on_model_delete function."""
115 |         obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=["foo", "bar"])
116 |         _on_model_delete(None, instance=obj)
117 |         assert mock_delete.call_count == 2
118 |         mock_delete.assert_called_with(instance=obj, index="bar")
119 | 
120 |     @mock.patch("elasticsearch_django.apps.settings.auto_sync")
121 |     @mock.patch("elasticsearch_django.apps.pre_delete")
122 |     def test__delete_from_search_index_True(self, mock_delete_signal, mock_auto_sync):
123 |         """Test the _delete_from_search_index function when AUTO_SYNC=True."""
124 |         mock_auto_sync.return_value = True
125 |         obj = mock.Mock(spec=SearchDocumentMixin)
126 |         _delete_from_search_index(instance=obj, index="foo")
127 |         mock_delete_signal.send.assert_called_once_with(
128 |             sender=obj.__class__, instance=obj, index="foo"
129 |         )
130 |         obj.delete_search_document.assert_called_once_with(index="foo")
131 | 
132 |     @mock.patch("elasticsearch_django.apps.settings.auto_sync")
133 |     @mock.patch("elasticsearch_django.apps.pre_delete")
134 |     def test__delete_from_search_index_False(self, mock_delete_signal, mock_auto_sync):
135 |         """Test the _delete_from_search_index function when AUTO_SYNC=False."""
136 |         obj = mock.Mock(spec=SearchDocumentMixin)
137 |         mock_auto_sync.return_value = False
138 |         _delete_from_search_index(instance=obj, index="foo")
139 |         mock_delete_signal.send.assert_called_once_with(
140 |             sender=obj.__class__, instance=obj, index="foo"
141 |         )
142 |         obj.delete_search_document.assert_not_called()
143 | 
144 |     @mock.patch("elasticsearch_django.apps._update_search_index")
145 |     def test__on_model_save__index(self, mock_update):
146 |         """Test the _on_model_save function without update_fields."""
147 |         obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=["foo"])
148 |         _on_model_save(None, instance=obj, update_fields=None)
149 |         mock_update.assert_called_once_with(
150 |             instance=obj, index="foo", update_fields=None
151 |         )
152 | 
153 |     @mock.patch("elasticsearch_django.apps._update_search_index")
154 |     def test__on_model_save__update(self, mock_update):
155 |         """Test the _on_model_save function without update_fields."""
156 |         obj = mock.Mock(spec=SearchDocumentMixin, search_indexes=["foo"])
157 |         _on_model_save(None, instance=obj, update_fields=["bar"])
158 |         mock_update.assert_called_once_with(
159 |             instance=obj, index="foo", update_fields=["bar"]
160 |         )
161 | 
162 |     @mock.patch("elasticsearch_django.apps._in_search_queryset")
163 |     @mock.patch("elasticsearch_django.apps.settings.auto_sync")
164 |     def test__update_search_index__auto_sync(self, mock_auto_sync, mock_in_qs):
165 |         """Test the _update_search_index function with an index action."""
166 |         mock_auto_sync.return_value = True
167 |         mock_in_qs.return_value = False
168 |         obj = mock.Mock(spec=SearchDocumentMixin)
169 |         _update_search_index(instance=obj, index="foo", update_fields=None)
170 |         assert obj.index_search_document.call_count == 0
171 |         assert obj.update_search_document.call_count == 0
172 |         obj.index_search_document.assert_not_called()
173 |         obj.update_search_document.assert_not_called()
174 |         obj.delete_search_document.assert_not_called()
175 | 
176 |     @mock.patch("elasticsearch_django.apps._in_search_queryset")
177 |     @mock.patch("elasticsearch_django.apps.settings.auto_sync")
178 |     def test__update_search_index__not_in_qs(self, mock_auto_sync, mock_in_qs):
179 |         """Test the _update_search_index function with an index action."""
180 |         mock_auto_sync.return_value = True
181 |         mock_in_qs.return_value = True
182 |         obj = mock.Mock(spec=SearchDocumentMixin)
183 |         _update_search_index(instance=obj, index="foo", update_fields=None)
184 |         assert obj.index_search_document.call_count == 1
185 |         assert obj.update_search_document.call_count == 0
186 |         obj.index_search_document.assert_called_once_with(index="foo")
187 |         obj.update_search_document.assert_not_called()
188 |         obj.delete_search_document.assert_not_called()
189 | 
190 |     @mock.patch("elasticsearch_django.apps._in_search_queryset")
191 |     @mock.patch("elasticsearch_django.apps.settings.auto_sync")
192 |     def test__update_search_index__no_auto_sync(self, mock_auto_sync, mock_in_qs):
193 |         """Test the _update_search_index function with an index action."""
194 |         mock_auto_sync.return_value = False
195 |         mock_in_qs.return_value = True
196 |         obj = mock.Mock(spec=SearchDocumentMixin)
197 |         _update_search_index(instance=obj, index="foo", update_fields=None)
198 |         assert obj.index_search_document.call_count == 0
199 |         assert obj.update_search_document.call_count == 0
200 |         obj.index_search_document.assert_not_called()
201 |         obj.update_search_document.assert_not_called()
202 |         obj.delete_search_document.assert_not_called()
203 | 


--------------------------------------------------------------------------------
/tests/test_commands.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | 
  3 | from elasticsearch.exceptions import TransportError
  4 | 
  5 | from elasticsearch_django.management.commands import (
  6 |     BaseSearchCommand,
  7 |     create_search_index,
  8 |     delete_search_index,
  9 |     prune_search_index,
 10 |     rebuild_search_index,
 11 |     update_search_index,
 12 | )
 13 | 
 14 | 
 15 | class BaseSearchCommandTests:
 16 |     """Tests for the elasticsearch_django management commands base command."""
 17 | 
 18 |     @mock.patch("elasticsearch_django.management.commands.logger")
 19 |     @mock.patch.object(BaseSearchCommand, "do_index_command")
 20 |     def test_handle(self, mock_do, mock_log):
 21 |         """Test the main handle method calls do_index_command."""
 22 |         obj = BaseSearchCommand()
 23 |         obj.handle(indexes=["foo", "bar"])
 24 |         # this should have called the do_index_command twice
 25 |         mock_do.assert_has_calls([mock.call("foo"), mock.call("bar")])
 26 |         mock_do.reset_mock()
 27 |         mock_do.side_effect = TransportError(message="oops", errors=(Exception(),))
 28 |         obj.handle(indexes=["baz"])
 29 |         mock_do.assert_called_once_with("baz")
 30 |         mock_log.exception.assert_called_once()
 31 | 
 32 | 
 33 | class NamedCommandTests:
 34 |     """Test each named command."""
 35 | 
 36 |     @mock.patch(
 37 |         "elasticsearch_django.management.commands.create_search_index.create_index"
 38 |     )
 39 |     def test_create_search_index(self, mock_create):
 40 |         """Test the create_search_index command."""
 41 |         cmd = create_search_index.Command()
 42 |         cmd.do_index_command("foo")
 43 |         mock_create.assert_called_once_with("foo")
 44 | 
 45 |     @mock.patch(
 46 |         "elasticsearch_django.management.commands.delete_search_index.delete_index"
 47 |     )
 48 |     def test_delete_search_index(self, mock_delete):
 49 |         """Test the delete_search_index command."""
 50 |         cmd = delete_search_index.Command()
 51 |         retval = cmd.do_index_command(
 52 |             "foo", interactive=False
 53 |         )  # True would hang the tests
 54 |         assert retval == mock_delete.return_value.body
 55 |         mock_delete.assert_called_once_with("foo")
 56 |         mock_delete.reset_mock()
 57 | 
 58 |         # mock out thw raw_input so the test doesn't hang
 59 |         with mock.patch.object(
 60 |             delete_search_index.Command, "_confirm_action"
 61 |         ) as mock_confirm:
 62 |             mock_confirm.return_value = False
 63 |             retval = cmd.do_index_command("foo", interactive=True)
 64 |             mock_delete.assert_not_called()
 65 |             assert retval is None
 66 | 
 67 |     @mock.patch(
 68 |         "elasticsearch_django.management.commands.prune_search_index.prune_index"
 69 |     )
 70 |     def test_prune_search_index(self, mock_prune):
 71 |         """Test the prune_search_index command."""
 72 |         cmd = prune_search_index.Command()
 73 |         cmd.do_index_command("foo")
 74 |         mock_prune.assert_called_once_with("foo")
 75 | 
 76 |     @mock.patch(
 77 |         "elasticsearch_django.management.commands.update_search_index.update_index"
 78 |     )
 79 |     def test_update_search_index(self, mock_update):
 80 |         """Test the update_search_index command."""
 81 |         cmd = update_search_index.Command()
 82 |         cmd.do_index_command("foo")
 83 |         mock_update.assert_called_once_with("foo")
 84 | 
 85 |     @mock.patch(
 86 |         "elasticsearch_django.management.commands.rebuild_search_index.delete_index"
 87 |     )
 88 |     @mock.patch(
 89 |         "elasticsearch_django.management.commands.rebuild_search_index.create_index"
 90 |     )
 91 |     @mock.patch(
 92 |         "elasticsearch_django.management.commands.rebuild_search_index.update_index"
 93 |     )
 94 |     def test_rebuild_search_index(self, mock_update, mock_create, mock_delete):
 95 |         """Test the rebuild_search_index command."""
 96 |         cmd = rebuild_search_index.Command()
 97 |         result = cmd.do_index_command(
 98 |             "foo", interactive=False
 99 |         )  # True would hang the tests
100 |         mock_delete.assert_called_once_with("foo")
101 |         mock_create.assert_called_once_with("foo")
102 |         mock_update.assert_called_once_with("foo")
103 |         assert result["delete"] == mock_delete.return_value.body
104 |         assert result["create"] == mock_create.return_value.body
105 |         assert result["update"] == mock_update.return_value
106 |         # check that the delete is handled if the index does not exist
107 |         mock_delete.side_effect = TransportError("Index not found")
108 |         result = cmd.do_index_command(
109 |             "foo", interactive=False
110 |         )  # True would hang the tests
111 |         assert result["delete"] == {}
112 | 


--------------------------------------------------------------------------------
/tests/test_decorators.py:
--------------------------------------------------------------------------------
 1 | from django.db.models import signals
 2 | from django.test import TestCase
 3 | 
 4 | from elasticsearch_django.apps import _on_model_save
 5 | from elasticsearch_django.decorators import disable_search_updates
 6 | 
 7 | 
 8 | class DecoratorTests(TestCase):
 9 |     def setUp(self):
10 |         signals.post_save.connect(_on_model_save)
11 | 
12 |     def tearDown(self):
13 |         signals.post_save.disconnect(_on_model_save)
14 | 
15 |     def test_disable_updates(self):
16 |         """Check the decorator removes _on_model_save from signal receivers."""
17 |         self.assertNotEqual(signals.post_save.receivers, [])
18 |         self.assertEqual(signals.post_save.receivers[0][1](), _on_model_save)
19 |         with disable_search_updates():
20 |             self.assertEqual(signals.post_save.receivers, [])
21 |         self.assertEqual(signals.post_save.receivers[0][1](), _on_model_save)
22 | 


--------------------------------------------------------------------------------
/tests/test_index_functions.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | 
  3 | import pytest
  4 | 
  5 | from elasticsearch_django.index import (
  6 |     _prune_hit,
  7 |     bulk_actions,
  8 |     create_index,
  9 |     delete_index,
 10 |     prune_index,
 11 |     scan_index,
 12 |     update_index,
 13 | )
 14 | 
 15 | from .models import ExampleModel, ExampleModelManager
 16 | 
 17 | 
 18 | class IndexFunctionTests:
 19 |     """Test index functions."""
 20 | 
 21 |     @mock.patch("elasticsearch_django.index.get_client")
 22 |     @mock.patch("elasticsearch_django.index.get_index_mapping")
 23 |     def test_create_index(self, mock_mapping, mock_client):
 24 |         """Test the create_index function."""
 25 |         mock_client.return_value = mock.Mock()
 26 |         create_index("foo")
 27 |         mock_client.assert_called_once_with()
 28 |         mock_mapping.assert_called_once_with("foo")
 29 |         mock_client.return_value.indices.create.assert_called_once_with(
 30 |             index="foo",
 31 |             mappings=mock_mapping.return_value["mappings"],
 32 |             settings=mock_mapping.return_value.get("settings"),
 33 |         )
 34 | 
 35 |     from django.db.models.query import QuerySet
 36 | 
 37 |     @mock.patch.object(QuerySet, "iterator")
 38 |     @mock.patch("elasticsearch_django.index.get_client")
 39 |     @mock.patch("elasticsearch_django.index.bulk_actions")
 40 |     @mock.patch("elasticsearch_django.index.get_index_models")
 41 |     @mock.patch("elasticsearch.helpers.bulk")
 42 |     def test_update_index(
 43 |         self, mock_bulk, mock_models, mock_actions, mock_client, mock_qs
 44 |     ):
 45 |         """Test the update_index function."""
 46 |         mock_foo = mock.Mock()
 47 |         mock_foo.search_doc_type = mock.PropertyMock(return_value="bar")
 48 |         mock_foo.objects = mock.PropertyMock(return_value=mock.Mock())
 49 |         mock_models.return_value = [mock_foo]
 50 |         responses = update_index("foo")
 51 |         assert responses == [mock_bulk.return_value]
 52 | 
 53 |     @mock.patch("elasticsearch_django.index.get_client")
 54 |     def test_delete_index(self, mock_client):
 55 |         """Test the delete_index function."""
 56 |         delete_index("foo")
 57 |         mock_client.assert_called_once()
 58 |         mock_client.return_value.indices.delete.assert_called_once_with(
 59 |             index="foo", ignore_unavailable=True
 60 |         )
 61 | 
 62 |     @mock.patch("elasticsearch_django.index.helpers")
 63 |     @mock.patch("elasticsearch_django.index.scan_index")
 64 |     @mock.patch("elasticsearch_django.index._prune_hit")
 65 |     @mock.patch("elasticsearch_django.index.bulk_actions")
 66 |     @mock.patch("elasticsearch_django.index.get_index_models")
 67 |     @mock.patch("elasticsearch_django.index.get_client")
 68 |     def test_prune_index(
 69 |         self,
 70 |         mock_client,
 71 |         mock_models,
 72 |         mock_actions,
 73 |         mock_prune,
 74 |         mock_scan,
 75 |         mock_helpers,
 76 |     ):
 77 |         """Test the prune_index function."""
 78 |         # this forces one single evaluation of the outer and inner for loop
 79 |         mock_models.return_value = [ExampleModel]
 80 |         mock_scan.return_value = ["hit"]
 81 | 
 82 |         # _prune_hit returns an object, so bulk should be called
 83 |         mock_prune.return_value = ExampleModel()
 84 |         # should return a list with one item in it
 85 |         assert prune_index("foo") == [mock_helpers.bulk.return_value]
 86 |         # should have called actions and bulk once each
 87 |         mock_actions.assert_called_once()
 88 |         mock_helpers.bulk.assert_called_once()
 89 | 
 90 |         mock_actions.reset_mock()
 91 |         mock_helpers.bulk.reset_mock()
 92 |         # if there are no objects to prune
 93 |         mock_prune.return_value = None
 94 |         # should return an empty list
 95 |         assert prune_index("foo") == []
 96 |         # shouldn't call either actions or bulk (as there's no need)
 97 |         mock_actions.assert_not_called()
 98 |         mock_helpers.bulk.assert_not_called()
 99 | 
100 |     @mock.patch.object(ExampleModelManager, "in_search_queryset")
101 |     def test__prune_hit(self, mock_qs):
102 |         """Test the _prune_hit function."""
103 |         hit = {"_id": 1, "_index": "foo"}
104 |         mock_qs.return_value = True
105 |         assert _prune_hit(hit, ExampleModel) is None
106 | 
107 |         mock_qs.return_value = False
108 |         # should now return an instance of ExampleModel
109 |         obj = _prune_hit(hit, ExampleModel)
110 |         assert isinstance(obj, ExampleModel)
111 |         assert obj.id == hit["_id"]
112 | 
113 |     @mock.patch("elasticsearch_django.index.get_client")
114 |     @mock.patch("elasticsearch_django.index.helpers")
115 |     def test_scan_index(self, mock_helpers, mock_client):
116 |         """Test the scan_index function."""
117 |         # cast to list to force evaluation of the generator
118 |         response = list(scan_index("foo", ExampleModel))
119 |         mock_helpers.scan.assert_called_once_with(mock_client.return_value, index="foo")
120 |         assert response == list(mock_helpers.scan.return_value)
121 | 
122 |     @mock.patch.object(ExampleModel, "as_search_action")
123 |     def test_bulk_actions(self, mock_action):
124 |         """Test the bulk_actions function."""
125 |         # cannot pass in in '_all' as the bulk_actions
126 |         with pytest.raises(ValueError):
127 |             list(bulk_actions([], "_all", "index"))
128 | 
129 |         mock_action.return_value = "foo"
130 |         objects = [ExampleModel(), ExampleModel()]
131 | 
132 |         assert list(bulk_actions(objects, "foo", "update")) == ["foo", "foo"]
133 | 
134 |         # now let's add in a bad object, and check we still get the good one
135 |         assert list(bulk_actions([ExampleModel(), "bad"], "foo", "update")) == ["foo"]
136 | 


--------------------------------------------------------------------------------
/tests/test_models.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import decimal
  3 | from unittest import mock
  4 | from uuid import uuid4
  5 | 
  6 | import pytest
  7 | from django.core.cache import cache
  8 | from django.utils.timezone import now as tz_now
  9 | from elastic_transport import ObjectApiResponse
 10 | from elasticsearch import Elasticsearch
 11 | 
 12 | # from elasticsearch_django.api import Count, Search
 13 | from elasticsearch_django.models import (
 14 |     UPDATE_STRATEGY_FULL,
 15 |     UPDATE_STRATEGY_PARTIAL,
 16 |     SearchDocumentManagerMixin,
 17 |     SearchDocumentMixin,
 18 |     SearchQuery,
 19 |     SearchResponseParser,
 20 | )
 21 | 
 22 | from .models import (
 23 |     ExampleModel,
 24 |     ExampleModelManager,
 25 |     ExampleModelWithCustomPrimaryKey,
 26 |     ModelA,
 27 |     ModelB,
 28 | )
 29 | 
 30 | 
 31 | class SearchDocumentMixinTests:
 32 |     """Tests for the SearchDocumentMixin."""
 33 | 
 34 |     @pytest.fixture
 35 |     def test_obj(self) -> ExampleModel:
 36 |         return ExampleModel(pk=1, simple_field_1=99, simple_field_2="foo")
 37 | 
 38 |     @mock.patch("elasticsearch_django.models.get_model_indexes")
 39 |     def test_search_indexes(self, mock_indexes, test_obj: ExampleModel):
 40 |         """Test the search_indexes function."""
 41 |         mock_indexes.return_value = "foo"
 42 |         assert test_obj.search_indexes == "foo", test_obj.search_indexes
 43 |         mock_indexes.assert_called_once_with(ExampleModel)
 44 | 
 45 |     def test_as_search_document(self):
 46 |         """Test the as_search_document method."""
 47 |         obj = SearchDocumentMixin()
 48 |         with pytest.raises(NotImplementedError):
 49 |             obj.as_search_document(index="_all")
 50 | 
 51 |     @mock.patch("elasticsearch_django.models.get_model_index_properties")
 52 |     def test_clean_update_fields(self, mock_properties, test_obj: ExampleModel):
 53 |         """Test that only fields in the mapping file are cleaned."""
 54 |         mock_properties.return_value = ["simple_field_1", "complex_field"]
 55 |         assert test_obj.clean_update_fields(
 56 |             index="", update_fields=["simple_field_1", "simple_field_2"]
 57 |         ) == ["simple_field_1"]
 58 | 
 59 |     @mock.patch("elasticsearch_django.models.get_model_index_properties")
 60 |     def test_clean_update_fields_related_field(
 61 |         self, mock_properties, test_obj: ExampleModel
 62 |     ):
 63 |         """Test that relation fields raise a ValueError."""
 64 |         test_obj = ExampleModel()
 65 |         mock_properties.return_value = ["simple_field_1", "user"]
 66 |         with pytest.raises(ValueError):
 67 |             test_obj.clean_update_fields(
 68 |                 index="",
 69 |                 update_fields=["simple_field_1", "complex_field", "user"],
 70 |             )
 71 | 
 72 |     @mock.patch("elasticsearch_django.models.get_model_index_properties")
 73 |     def test_as_search_document_update_full(
 74 |         self, mock_properties, test_obj: ExampleModel
 75 |     ):
 76 |         """Test the as_search_document_update method."""
 77 |         test_obj = ExampleModel(simple_field_1=1, simple_field_2="foo")
 78 |         mock_properties.return_value = ["simple_field_1"]
 79 |         with mock.patch(
 80 |             "elasticsearch_django.models.UPDATE_STRATEGY", UPDATE_STRATEGY_FULL
 81 |         ):
 82 |             assert test_obj.as_search_document_update(
 83 |                 index="_all", update_fields=["simple_field_1"]
 84 |             ) == test_obj.as_search_document(index="_all")
 85 | 
 86 |     @mock.patch("elasticsearch_django.models.UPDATE_STRATEGY", UPDATE_STRATEGY_PARTIAL)
 87 |     @mock.patch("elasticsearch_django.models.get_model_index_properties")
 88 |     def test_as_search_document_update_partial(
 89 |         self, mock_properties, test_obj: ExampleModel
 90 |     ):
 91 |         """Test the as_search_document_update method."""
 92 |         mock_properties.return_value = ["simple_field_1", "simple_field_2"]
 93 |         assert test_obj.as_search_document_update(
 94 |             index="_all", update_fields=["simple_field_1", "simple_field_2"]
 95 |         ) == {
 96 |             "simple_field_1": test_obj.simple_field_1,
 97 |             "simple_field_2": test_obj.simple_field_2,
 98 |         }
 99 | 
100 |         # remove simple_field_2 from the mapping - should no longer be included
101 |         mock_properties.return_value = ["simple_field_1"]
102 |         assert test_obj.as_search_document_update(
103 |             index="_all", update_fields=["simple_field_1", "simple_field_2"]
104 |         ) == {"simple_field_1": test_obj.simple_field_1}
105 | 
106 |     @mock.patch(
107 |         "elasticsearch_django.settings.get_connection_settings",
108 |         lambda: "http://testserver",
109 |     )
110 |     @mock.patch("elasticsearch_django.models.get_client")
111 |     def test_index_search_document(self, mock_client, test_obj: ExampleModel):
112 |         """Test the index_search_document sets the cache."""
113 |         # obj = ExampleModel(pk=1, simple_field_1=1, simple_field_2="foo")
114 |         doc = test_obj.as_search_document(index="_all")
115 |         key = test_obj.search_document_cache_key
116 |         assert cache.get(key) is None
117 |         test_obj.index_search_document(index="_all")
118 |         assert cache.get(key) == doc
119 |         mock_client.return_value.index.assert_called_once_with(
120 |             index="_all",
121 |             document=doc,
122 |             id=test_obj.get_search_document_id(),
123 |         )
124 | 
125 |     @mock.patch(
126 |         "elasticsearch_django.settings.get_connection_settings",
127 |         lambda: "http://testserver",
128 |     )
129 |     @mock.patch("elasticsearch_django.models.get_client")
130 |     def test_index_search_document_cached(self, mock_client, test_obj: ExampleModel):
131 |         """Test the index_search_document does not update if doc is a duplicate."""
132 |         doc = test_obj.as_search_document(index="_all")
133 |         key = test_obj.search_document_cache_key
134 |         cache.set(key, doc, timeout=1)
135 |         assert cache.get(key) == doc
136 |         test_obj.index_search_document(index="_all")
137 |         assert mock_client.call_count == 0
138 | 
139 |     @mock.patch(
140 |         "elasticsearch_django.settings.get_connection_settings",
141 |         lambda: "http://testserver",
142 |     )
143 |     @mock.patch("elasticsearch_django.models.get_setting")
144 |     @mock.patch("elasticsearch_django.models.get_client")
145 |     def test_update_search_document(
146 |         self, mock_client, mock_setting, test_obj: ExampleModel
147 |     ):
148 |         """Test the update_search_document wraps up doc correctly."""
149 |         doc = test_obj.as_search_document_update(
150 |             index="_all", update_fields=["simple_field_1"]
151 |         )
152 |         test_obj.update_search_document(index="_all", update_fields=["simple_field_1"])
153 |         mock_client.return_value.update.assert_called_once_with(
154 |             index="_all",
155 |             id=test_obj.get_search_document_id(),
156 |             doc=doc,
157 |             retry_on_conflict=mock_setting.return_value,
158 |         )
159 |         mock_setting.assert_called_once_with("retry_on_conflict", 0)
160 | 
161 |     @mock.patch(
162 |         "elasticsearch_django.settings.get_connection_settings",
163 |         lambda: "http://testserver",
164 |     )
165 |     @mock.patch("elasticsearch_django.models.get_client")
166 |     def test_update_search_document_empty(self, mock_client, test_obj: ExampleModel):
167 |         """Test the update_search_document ignores empty updates."""
168 |         with mock.patch.object(
169 |             ExampleModel, "as_search_document_update"
170 |         ) as mock_update:
171 |             mock_update.return_value = {}
172 |             # this will return an empty dictionary as the partial update doc
173 |             test_obj.update_search_document(index="_all", update_fields=[])
174 |             mock_client.return_value.update.assert_not_called()
175 | 
176 |     @mock.patch(
177 |         "elasticsearch_django.settings.get_connection_settings",
178 |         lambda: "http://testserver",
179 |     )
180 |     @mock.patch("elasticsearch_django.models.get_client")
181 |     def test_delete_search_document(self, mock_client, test_obj: ExampleModel):
182 |         """Test the delete_search_document clears the cache."""
183 |         doc = test_obj.as_search_document(index="_all")
184 |         key = test_obj.search_document_cache_key
185 |         cache.set(key, doc)
186 |         assert cache.get(key) is not None
187 |         test_obj.delete_search_document(index="_all")
188 |         assert cache.get(key) is None
189 |         mock_client.return_value.delete.assert_called_once_with(
190 |             index="_all", id=test_obj.get_search_document_id()
191 |         )
192 | 
193 |     def test_as_search_action(self, test_obj: ExampleModel):
194 |         """Test the as_search_action method."""
195 |         # invalid action 'bar'
196 |         with pytest.raises(ValueError):
197 |             test_obj.as_search_action(index="foo", action="bar")
198 | 
199 |         assert test_obj.as_search_action(index="foo", action="index") == {
200 |             "_index": "foo",
201 |             "_op_type": "index",
202 |             "_id": test_obj.get_search_document_id(),
203 |             "_source": test_obj.as_search_document(),
204 |         }
205 | 
206 |         assert test_obj.as_search_action(index="foo", action="update") == {
207 |             "_index": "foo",
208 |             "_op_type": "update",
209 |             "_id": test_obj.get_search_document_id(),
210 |             "doc": test_obj.as_search_document(),
211 |         }
212 | 
213 |         assert test_obj.as_search_action(index="foo", action="delete") == {
214 |             "_index": "foo",
215 |             "_op_type": "delete",
216 |             "_id": test_obj.get_search_document_id(),
217 |         }
218 | 
219 |     @mock.patch("elasticsearch_django.models.get_client")
220 |     def test_fetch_search_document(self, mock_client):
221 |         """Test the fetch_search_document method."""
222 |         obj = ExampleModel()
223 |         # obj has no id
224 |         with pytest.raises(ValueError):
225 |             obj.fetch_search_document(index="foo")
226 | 
227 |         # should now call the ES get method
228 |         obj.id = 1
229 |         response = obj.fetch_search_document(index="foo")
230 |         mock_get = mock_client.return_value.get
231 |         mock_get.assert_called_once_with(index="foo", id=obj.get_search_document_id())
232 |         assert response == mock_get.return_value
233 | 
234 | 
235 | class SearchDocumentManagerMixinTests:
236 |     """Tests for the SearchDocumentManagerMixin."""
237 | 
238 |     def test_get_search_queryset(self):
239 |         """Test the get_search_queryset method."""
240 |         obj = SearchDocumentManagerMixin()
241 |         with pytest.raises(NotImplementedError):
242 |             obj.get_search_queryset()
243 | 
244 |     @mock.patch.object(ExampleModelManager, "get_search_queryset", autospec=True)
245 |     def test_in_search_queryset(self, mock_qs):
246 |         """Test the in_search_queryset method."""
247 |         obj = ExampleModel(id=1, simple_field_1=1, simple_field_2="foo")
248 |         ExampleModel.objects.in_search_queryset(obj.get_search_document_id())
249 |         mock_qs.assert_called_once_with(ExampleModel.objects, index="_all")
250 |         mock_qs.return_value.filter.assert_called_once_with(
251 |             pk=obj.get_search_document_id()
252 |         )
253 |         mock_qs.return_value.filter.return_value.using.assert_called_once_with("foo")
254 |         mock_qs.return_value.filter.return_value.using.return_value.exists.assert_called_once_with()
255 | 
256 |     @mock.patch.object(ExampleModelManager, "get_search_queryset", autospec=True)
257 |     def test_in_search_queryset_with_a_model_using_custom_primary_key(self, mock_qs):
258 |         """Test the in_search_queryset method."""
259 |         obj = ExampleModelWithCustomPrimaryKey(simple_field_1=1)
260 |         ExampleModelWithCustomPrimaryKey.objects.in_search_queryset(
261 |             obj.get_search_document_id()
262 |         )
263 |         mock_qs.assert_called_once_with(
264 |             ExampleModelWithCustomPrimaryKey.objects, index="_all"
265 |         )
266 |         mock_qs.return_value.filter.assert_called_once_with(pk="1")
267 |         mock_qs.return_value.filter.return_value.using.assert_called_once_with("foo")
268 |         mock_qs.return_value.filter.return_value.using.return_value.exists.assert_called_once_with()
269 | 
270 |     @mock.patch("django.db.models.query.QuerySet", autospec=True)
271 |     def test_from_search_query(self, mock_qs):
272 |         """Test the from_search_query method."""
273 |         self.maxDiff = None
274 |         sq = SearchQuery(
275 |             query={"query": {"match_all": {}}},
276 |             hits=[{"id": "1", "score": 1.0}, {"id": "2", "score": 2.0}],
277 |         )
278 |         qs = ExampleModel.objects.all().from_search_results(sq)
279 | 
280 |         # These two produce equivalent queries, but one uses field name
281 |         # and the other uses field index. I thought this was a clean
282 |         # update in Django 50, but it appears not to be the case so I'm
283 |         # checking both variants. They are both 'correct' SQL and will
284 |         # get the same results.
285 |         def order_by_name(query: str) -> str:
286 |             return query + "ORDER BY 'search_rank' ASC"
287 | 
288 |         def order_by_index(query: str) -> str:
289 |             return query + "ORDER BY 6 ASC"
290 | 
291 |         query1 = (
292 |             'SELECT "tests_examplemodel"."id", "tests_examplemodel"."user_id", "tests_examplemodel"."simple_field_1", '  # noqa: S608
293 |             '"tests_examplemodel"."simple_field_2", "tests_examplemodel"."complex_field", '
294 |             'CASE WHEN "tests_examplemodel"."id" = 1 THEN 1 WHEN "tests_examplemodel"."id" = 2 '
295 |             'THEN 2 ELSE NULL END AS "search_rank", CASE WHEN "tests_examplemodel"."id" = 1 '
296 |             'THEN 1.0 WHEN "tests_examplemodel"."id" = 2 THEN 2.0 ELSE NULL END AS "search_score" '
297 |             'FROM "tests_examplemodel" WHERE "tests_examplemodel"."id" IN (1, 2) '
298 |         )
299 |         assert str(qs.query) == order_by_name(query1) or order_by_index(query1)
300 | 
301 |         # test with a null score - new in v5
302 |         sq = SearchQuery(
303 |             query={"query": {"match_all": {}}},
304 |             hits=[{"id": 1, "score": None}, {"id": 2, "score": 2}],
305 |         )
306 |         qs = ExampleModel.objects.all().from_search_results(sq)
307 |         query2 = (
308 |             'SELECT "tests_examplemodel"."id", "tests_examplemodel"."user_id", "tests_examplemodel"."simple_field_1", '  # noqa: S608
309 |             '"tests_examplemodel"."simple_field_2", "tests_examplemodel"."complex_field", '
310 |             'CASE WHEN "tests_examplemodel"."id" = 1 THEN 1 WHEN "tests_examplemodel"."id" = 2 '
311 |             'THEN 2 ELSE NULL END AS "search_rank", CASE WHEN "tests_examplemodel"."id" = 1 '
312 |             'THEN 1.0 WHEN "tests_examplemodel"."id" = 2 THEN 2.0 ELSE NULL END AS "search_score" '
313 |             'FROM "tests_examplemodel" WHERE "tests_examplemodel"."id" IN (1, 2) '
314 |         )
315 |         assert str(qs.query) == order_by_name(query2) or order_by_index(query2)
316 | 
317 | 
318 | @pytest.mark.django_db
319 | class SearchQueryTests:
320 |     """Tests for the SearchQuery model."""
321 | 
322 |     hits = [
323 |         {"id": "1", "doc_type": "foo"},
324 |         {"id": "2", "doc_type": "foo"},
325 |         {"id": "3", "doc_type": "bar"},
326 |     ]
327 | 
328 |     hits_with_highlights = [
329 |         {"id": "1", "doc_type": "foo", "highlight": {"field1": ["bar"]}},
330 |         {"id": "2", "doc_type": "foo"},
331 |         {"id": "3", "doc_type": "bar"},
332 |     ]
333 | 
334 |     def test__hit_values(self):
335 |         """Test the _hit_values method."""
336 |         obj = SearchQuery(hits=self.hits)
337 |         assert set(obj._hit_values("id")) == {"1", "2", "3"}
338 | 
339 |     def test_object_ids(self):
340 |         """Test the object_ids property."""
341 |         obj = SearchQuery(hits=self.hits)
342 |         assert set(obj.object_ids) == {"1", "2", "3"}
343 | 
344 |     def test_save(self):
345 |         """Try saving unserializable JSON."""
346 |         today = datetime.date.today()
347 |         sq = SearchQuery(
348 |             user=None,
349 |             index="foo",
350 |             query={"today": today},
351 |             hits={"hits": decimal.Decimal("1.0")},
352 |             total_hits=100,
353 |             reference="bar",
354 |             executed_at=tz_now(),
355 |             duration=0,
356 |         )
357 |         sq.save()
358 |         sq.refresh_from_db()
359 |         # invalid JSON values will have been converted
360 |         assert sq.search_terms == ""
361 |         assert sq.query["today"] == today.isoformat()
362 |         assert sq.hits["hits"] == "1.0"
363 |         assert sq.query_type == SearchQuery.QueryType.SEARCH
364 |         assert sq.aggregations is None
365 | 
366 |     def test_paging(self):
367 |         """Test the paging properties."""
368 |         sq = SearchQuery()
369 |         assert sq.page_slice is None
370 | 
371 |         # no hits, so should all be 0
372 |         sq.query = {"from": 0, "size": 25}
373 |         assert sq.page_slice == (0, 25)
374 |         assert sq.page_from == 0
375 |         assert sq.page_to == 0
376 |         assert sq.page_size == 0
377 | 
378 |         # three hits
379 |         sq.hits = [1, 2, 3]  # random list of size = 3
380 |         sq.query = {"from": 0, "size": 25}
381 |         assert sq.page_from == 1
382 |         assert sq.page_to == 3
383 |         assert sq.page_size == 3
384 | 
385 |     def test_scores(self):
386 |         """Test the max/min properties."""
387 |         sq = SearchQuery()
388 |         assert sq.max_score == 0
389 |         assert sq.min_score == 0
390 | 
391 |         sq.hits = [{"score": 1}, {"score": 2}]
392 |         assert sq.max_score == 2
393 |         assert sq.min_score == 1
394 | 
395 |     def test_has_highlights(self):
396 |         sq = SearchQuery(query={"highlight": {}})
397 |         assert sq.has_highlights
398 |         sq = SearchQuery(query={"query": {"match_all": {}}})
399 |         assert not sq.has_highlights
400 | 
401 |     def test_get_doc_highlights(self):
402 |         sq = SearchQuery(query={"highlight": {}}, hits=self.hits_with_highlights)
403 |         assert sq.get_doc_highlights(1) == {"field1": ["bar"]}
404 | 
405 | 
406 | @pytest.mark.django_db
407 | class SearchResultsQuerySetTests:
408 |     def hits(self):
409 |         return [
410 |             {"id": str(uuid4()), "score": 3.0},
411 |             {"id": str(uuid4()), "score": 2.0},
412 |             {"id": str(uuid4()), "score": 1.0},
413 |         ]
414 | 
415 |     def test_from_search_results(self) -> None:
416 |         hits = self.hits()
417 |         model_a1 = ModelA.objects.create(field_1=hits[0]["id"], field_2="foo")
418 |         model_b = ModelB.objects.create(source=model_a1)
419 |         assert model_b.as_search_document(index="") == {
420 |             "field_2": "foo",
421 |             "extra_info": "some other data",
422 |         }
423 |         sq = SearchQuery(hits=hits)
424 |         qs = ModelA.objects.from_search_results(sq)
425 |         obj = qs.get()
426 |         assert obj == model_a1
427 |         assert obj.search_rank == 1
428 |         assert obj.search_score == 3.0
429 | 
430 | 
431 | @pytest.mark.django_db
432 | class SearchResponseParserTests:
433 |     def test_parse(self) -> None:
434 |         response = mock.MagicMock(spec=ObjectApiResponse)
435 |         response.body = {
436 |             "took": 31,
437 |             "timed_out": False,
438 |             "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
439 |             "hits": {
440 |                 "total": {"value": 190, "relation": "eq"},
441 |                 "max_score": 7.563781,
442 |                 "hits": [
443 |                     {
444 |                         "_index": "foo",
445 |                         "_id": "123",
446 |                         "_score": 7.563781,
447 |                         "_source": {"country": "GB", "city": "London", "name": "Fred"},
448 |                         "fields": {"country": ["gb"], "city": ["london"]},
449 |                         "highlight": {"country": ["<em>gb</em>"]},
450 |                     }
451 |                 ],
452 |             },
453 |             "aggregations": {
454 |                 "countries": {
455 |                     "doc_count_error_upper_bound": 0,
456 |                     "sum_other_doc_count": 0,
457 |                     "buckets": [{"key": "gb", "doc_count": 100}],
458 |                 }
459 |             },
460 |         }
461 |         parser = SearchResponseParser(response)
462 |         assert parser.total_hits == 190
463 |         assert parser.total_hits_relation == "eq"
464 |         assert parser.hits == [
465 |             {
466 |                 "id": "123",
467 |                 "index": "foo",
468 |                 "score": 7.563781,
469 |                 "fields": {"city": ["london"], "country": ["gb"]},
470 |                 "highlight": {"country": ["<em>gb</em>"]},
471 |             }
472 |         ]
473 |         assert parser.aggregations == {
474 |             "countries": {
475 |                 "doc_count_error_upper_bound": 0,
476 |                 "sum_other_doc_count": 0,
477 |                 "buckets": [{"key": "gb", "doc_count": 100}],
478 |             }
479 |         }
480 | 
481 | 
482 | @pytest.mark.django_db
483 | class ExecuteFunctionTests:
484 |     raw_hits = [
485 |         {"_id": "1", "_index": "foo", "_score": 1.1},
486 |         {"_id": "2", "_index": "foo", "_score": 1.2},
487 |         {"_id": "3", "_index": "bar", "_score": 1.3},
488 |     ]
489 |     clean_hits = [
490 |         {"id": "1", "index": "foo", "score": 1.1},
491 |         {"id": "2", "index": "foo", "score": 1.2},
492 |         {"id": "3", "index": "bar", "score": 1.3},
493 |     ]
494 | 
495 |     aggregations = {
496 |         "test_percentiles": {
497 |             "values": {
498 |                 "1.0": 10.0,
499 |                 "5.0": 15.0,
500 |                 "25.0": 200.0,
501 |                 "50.0": 350.0,
502 |                 "75.0": 400.0,
503 |                 "95.0": 600.0,
504 |                 "99.0": 1500.0,
505 |             }
506 |         }
507 |     }
508 | 
509 |     @mock.patch.object(Elasticsearch, "count")
510 |     def test_execute_count__no_save(self, mock_count: mock.MagicMock) -> None:
511 |         mock_count.return_value = mock.Mock(
512 |             spec=ObjectApiResponse,
513 |             body={
514 |                 "count": 562,
515 |                 "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
516 |             },
517 |         )
518 |         count = SearchQuery.do_count(index="index", query={"match_all": {}})
519 |         assert mock_count.call_count == 1
520 |         mock_count.assert_called_with(index="index", query={"match_all": {}})
521 |         assert count.total_hits == 562
522 |         assert count.hits is None
523 |         assert count.aggregations is None
524 | 
525 |     @mock.patch.object(Elasticsearch, "count")
526 |     def test_execute_count(self, mock_count):
527 |         mock_count.return_value = mock.Mock(
528 |             spec=ObjectApiResponse,
529 |             body={
530 |                 "count": 562,
531 |                 "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
532 |             },
533 |         )
534 |         sq = SearchQuery.do_count(index="index", query={"match_all": {}}).save(
535 |             search_terms="foo",
536 |             user=None,
537 |             reference="bar",
538 |         )
539 |         sq.refresh_from_db()  # just to confirm it saves in / out
540 |         assert sq.id is not None
541 |         assert sq.search_terms == "foo"
542 |         assert sq.reference == "bar"
543 |         assert sq.query == {"match_all": {}}
544 |         assert sq.index == "index"
545 |         assert sq.hits is None
546 |         assert sq.total_hits == 562
547 |         assert sq.total_hits_relation == SearchQuery.TotalHitsRelation.ACCURATE
548 |         assert sq.query_type == SearchQuery.QueryType.COUNT
549 |         assert sq.aggregations is None
550 |         assert sq.duration > 0
551 |         assert sq.query_response is None
552 | 
553 |     @mock.patch.object(Elasticsearch, "search")
554 |     def test_do_search__no_save(self, mock_search: mock.MagicMock):
555 |         mock_search.return_value = mock.Mock(
556 |             spec=ObjectApiResponse,
557 |             body={
558 |                 "hits": {
559 |                     "total": {"value": 168, "relation": "gte"},
560 |                     "max_score": 1.3,
561 |                     "hits": self.raw_hits,
562 |                 },
563 |                 "aggregations": self.aggregations,
564 |             },
565 |         )
566 |         search = SearchQuery.do_search(index="index", query={"match_all": {}})
567 |         assert mock_search.call_count == 1
568 |         mock_search.assert_called_with(
569 |             index="index",
570 |             query={"match_all": {}},
571 |             from_=0,
572 |             size=25,
573 |             _source=True,
574 |         )
575 |         assert search.total_hits == 168
576 |         assert search.max_score == 1.3
577 |         assert search.hits[0] == {"index": "foo", "id": "1", "score": 1.1}
578 |         assert search.query_response == mock_search.return_value
579 | 
580 |     @mock.patch.object(Elasticsearch, "search")
581 |     def test_do_search(self, mock_search):
582 |         # lots of mocking to get around lack of ES server during tests
583 | 
584 |         mock_search.return_value = mock.Mock(
585 |             spec=ObjectApiResponse,
586 |             body={
587 |                 "hits": {
588 |                     "total": {"value": 168, "relation": "gte"},
589 |                     "max_score": 1.1,
590 |                     "hits": self.raw_hits,
591 |                 },
592 |                 "aggregations": self.aggregations,
593 |             },
594 |         )
595 |         sq = SearchQuery.do_search(index="index", query={"match_all": {}}).save(
596 |             search_terms="foo",
597 |             user=None,
598 |             reference="bar",
599 |         )
600 |         sq.refresh_from_db()  # just to confirm it saves in / out
601 |         assert sq.id is not None
602 |         assert sq.search_terms == "foo"
603 |         assert sq.reference == "bar"
604 |         assert sq.query == {
605 |             "query": {"match_all": {}},
606 |             "from": 0,
607 |             "size": 25,
608 |             "_source": True,
609 |         }
610 |         assert sq.index == "index"
611 |         assert sq.hits == self.clean_hits
612 |         assert sq.total_hits == 168
613 |         assert sq.total_hits_relation == SearchQuery.TotalHitsRelation.ESTIMATE
614 |         assert sq.query_type == SearchQuery.QueryType.SEARCH
615 |         assert sq.aggregations == self.aggregations
616 |         assert sq.duration > 0
617 |         # the raw response - this is not saved to the db
618 |         assert sq.query_response == mock_search.return_value
619 |         assert sq.query_type == SearchQuery.QueryType.SEARCH
620 |         assert sq.aggregations == self.aggregations
621 |         assert sq.duration > 0
622 |         # the raw response - this is not saved to the db
623 |         assert sq.query_response == mock_search.return_value
624 |         assert sq.aggregations == self.aggregations
625 |         assert sq.duration > 0
626 |         # the raw response - this is not saved to the db
627 |         assert sq.query_response == mock_search.return_value
628 | 


--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
  1 | from unittest import mock
  2 | 
  3 | import pytest
  4 | from django.apps import apps
  5 | from django.test.utils import override_settings
  6 | from elasticsearch import Elasticsearch
  7 | 
  8 | from elasticsearch_django.settings import (
  9 |     auto_sync,
 10 |     get_client,
 11 |     get_connection_settings,
 12 |     get_document_models,
 13 |     get_index_config,
 14 |     get_index_models,
 15 |     get_index_names,
 16 |     get_model_indexes,
 17 |     get_setting,
 18 |     get_settings,
 19 | )
 20 | 
 21 | from .models import ExampleModel
 22 | 
 23 | TEST_SETTINGS = {
 24 |     "connections": {
 25 |         "default": "https://foo",
 26 |         "backup": {"hosts": "https://bar.baz:123", "api_key": ("id", "secret")},
 27 |     },
 28 |     "indexes": {"baz": {"models": ["tests.ExampleModel"]}},
 29 |     "settings": {"foo": "bar", "auto_sync": True, "never_auto_sync": []},
 30 | }
 31 | 
 32 | 
 33 | class SettingsFunctionTests:
 34 |     """Tests for the settings functions."""
 35 | 
 36 |     @mock.patch("elasticsearch_django.settings.get_connection_settings")
 37 |     def test_get_client(self, mock_conn):
 38 |         """Test the get_client function."""
 39 |         mock_conn.return_value = "http://foo:9200"
 40 |         client = get_client()
 41 |         assert len(client.transport.node_pool.all()) == 1
 42 |         assert client.transport.node_pool.all()[0].base_url == mock_conn()
 43 | 
 44 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 45 |     def test_get_client__init(self):
 46 |         """Test the get_client function initialises with correct settings."""
 47 | 
 48 |         def check_init(*args, **kwargs):
 49 |             assert kwargs == TEST_SETTINGS["connections"]["backup"]
 50 | 
 51 |         with mock.patch.object(Elasticsearch, "__init__", check_init):
 52 |             _ = get_client("backup")
 53 | 
 54 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 55 |     def test_get_settings(self):
 56 |         """Test the get_settings method."""
 57 |         assert get_settings() == TEST_SETTINGS["settings"]
 58 | 
 59 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 60 |     def test_get_setting(self):
 61 |         """Test the get_setting method."""
 62 |         assert get_setting("foo") == "bar"
 63 | 
 64 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 65 |     def test_get_setting_with_default(self):
 66 |         """Test the get_setting method."""
 67 |         with pytest.raises(KeyError):
 68 |             get_setting("bar")
 69 |         assert get_setting("bar", "baz") == "baz"
 70 | 
 71 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 72 |     def test_get_connection_settings(self):
 73 |         """Test the get_connection_settings method."""
 74 |         assert get_connection_settings() == TEST_SETTINGS["connections"]["default"]
 75 |         assert (
 76 |             get_connection_settings("backup") == TEST_SETTINGS["connections"]["backup"]
 77 |         )
 78 | 
 79 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 80 |     def test_get_index_config(self):
 81 |         """Test the get_index_config method."""
 82 |         assert get_index_config("baz") == TEST_SETTINGS["indexes"]["baz"]
 83 | 
 84 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 85 |     def test_get_index_names(self):
 86 |         """Test the get_index_names method."""
 87 |         assert get_index_names() == list(TEST_SETTINGS["indexes"].keys())
 88 | 
 89 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 90 |     def test_get_index_models(self):
 91 |         """Test the get_index_models function."""
 92 |         models = get_index_models("baz")
 93 |         assert models == [apps.get_model("tests", "ExampleModel")]
 94 | 
 95 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
 96 |     def test_get_model_indexes(self):
 97 |         """Test the get_model_indexes function."""
 98 |         # ExampleModel is in the TEST_SETTINGS
 99 |         assert get_model_indexes(ExampleModel) == ["baz"]
100 |         # plain old object isn't in any indexes
101 |         assert get_model_indexes(object) == []
102 | 
103 |     def test_get_index_mapping(self):
104 |         """Test the get_index_mapping function."""
105 |         # this interacts with the file system, not going to bother to test
106 |         # as it just opens a file and loads in into a dict - there's no 'logic'
107 |         pass
108 | 
109 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
110 |     def test_get_document_models(self):
111 |         """Test the get_document_models function."""
112 |         assert get_document_models() == {"baz.examplemodel": ExampleModel}
113 | 
114 |     @override_settings(SEARCH_SETTINGS=TEST_SETTINGS)
115 |     def test_auto_sync(self):
116 |         """Test the auto_sync function."""
117 |         obj = ExampleModel()
118 |         assert auto_sync(obj) is True
119 |         # Check that if the auto_sync is False, the function also returns false.
120 |         TEST_SETTINGS["settings"]["auto_sync"] = False
121 |         assert auto_sync(obj) is False
122 |         TEST_SETTINGS["settings"]["auto_sync"] = True
123 |         assert auto_sync(obj) is True
124 |         # Check that if a model is in never_auto_sync, then auto_sync returns false
125 |         TEST_SETTINGS["settings"]["never_auto_sync"].append("tests.examplemodel")
126 |         assert auto_sync(obj) is False
127 | 


--------------------------------------------------------------------------------
/tests/urls.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | from django.urls import path
3 | 
4 | admin.autodiscover()
5 | 
6 | urlpatterns = [path("admin/", admin.site.urls)]
7 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | isolated_build = True
 3 | envlist = fmt, lint, mypy,
 4 |     py38-django{32,40,41}
 5 |     py39-django{32,40,41}
 6 |     py310-django{32,40,41,42,50,main}
 7 |     py311-django{41,42,50,main}
 8 |     py312-django{41,42,50,main}
 9 | 
10 | [testenv]
11 | deps =
12 |     pytest
13 |     pytest-cov
14 |     pytest-django
15 |     django32: Django>=3.2,<3.3
16 |     django40: Django>=4.0,<4.1
17 |     django41: Django>=4.1,<4.2
18 |     django42: Django>=4.2,<4.3
19 |     django50: https://github.com/django/django/archive/stable/5.0.x.tar.gz
20 |     djangomain: https://github.com/django/django/archive/main.tar.gz
21 | 
22 | commands =
23 |     pytest --ds=tests.settings --cov=elasticsearch_django --verbose tests
24 | 
25 | [testenv:fmt]
26 | description = 'Source file formatting'
27 | deps =
28 |     black
29 | 
30 | commands =
31 |     black elasticsearch_django
32 | 
33 | [testenv:lint]
34 | description = 'Source file linting'
35 | deps =
36 |     ruff
37 | 
38 | commands =
39 |     ruff elasticsearch_django
40 | 
41 | [testenv:mypy]
42 | deps =
43 |     mypy
44 | 
45 | commands =
46 |     mypy elasticsearch_django
47 | 


--------------------------------------------------------------------------------