├── .gitignore
├── .project
├── .pydevproject
├── .settings
└── org.eclipse.core.resources.prefs
├── .travis.yml
├── LICENSE
├── README.rst
├── bungiesearch
├── __init__.py
├── aliases.py
├── fields.py
├── indices.py
├── logger.py
├── management
│ ├── __init__.py
│ └── commands
│ │ ├── __init__.py
│ │ ├── _utils.py
│ │ ├── clear_index.py
│ │ ├── rebuild_index.py
│ │ └── search_index.py
├── managers.py
├── signals.py
└── utils.py
├── requirements.txt
├── runtests.sh
├── setup.cfg
├── setup.py
├── setup.sh
└── tests
├── __init__.py
├── core
├── __init__.py
├── analysis.py
├── bungie_signal.py
├── models.py
├── search_aliases.py
├── search_indices.py
├── search_indices_bis.py
├── templates
│ └── article.txt
├── test_bungiesearch.py
└── test_settings.py
├── manage.py
├── pytest.ini
└── settings.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.egg
3 |
4 | /venv
5 | /build/
6 | /dist/
7 | /cache/
8 | /.cache/
9 | /bungiesearch.egg-info/
10 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | bungiesearch
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 | bungiesearch
4 | python 2.7
5 |
6 | /${PROJECT_DIR_NAME}/bungiesearch
7 | /${PROJECT_DIR_NAME}/tests
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding/setup.py=utf-8
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: python
3 | env:
4 | global:
5 | - TRAVIS=true
6 | - ELASTIC_SEARCH_URL=localhost
7 | matrix:
8 | include:
9 | - python: "2.7"
10 | env: DJANGO_VERSION=">=1.8,<1.9"
11 | - python: "2.7"
12 | env: DJANGO_VERSION=">=1.9,<1.10"
13 | - python: "2.7"
14 | env: DJANGO_VERSION=">=1.10,<1.11"
15 | - python: "3.4"
16 | env: DJANGO_VERSION=">=1.8,<1.9"
17 | - python: "3.4"
18 | env: DJANGO_VERSION=">=1.9,<1.10"
19 | - python: "3.5"
20 | env: DJANGO_VERSION=">=1.8,<1.9"
21 | - python: "3.5"
22 | env: DJANGO_VERSION=">=1.9,<1.10"
23 | - python: "3.5"
24 | env: DJANGO_VERSION=">=1.10,<1.11" COVERAGE=true
25 | install:
26 | - pip install Django$DJANGO_VERSION
27 | - pip install -r requirements.txt
28 | - wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-2.3.0.zip
29 | - unzip -o elasticsearch-2.3.0.zip &> /dev/null
30 | script:
31 | - ./runtests.sh --cluster
32 | after_success:
33 | test -n "$COVERAGE" && coveralls
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014, Sparrho
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | * Neither the name of Sparrho nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | WARNING: UNMAINTAINED
2 | ============
3 | This package is no longer maintained. You may want to check out the `elasticsearch-dsl-py `__ or `django-haystack `__.
4 |
5 | Bungiesearch
6 | ============
7 |
8 | |Build Status| |Coverage Status|
9 |
10 | .. contents:: Table of contents
11 | :depth: 2
12 |
13 | Purpose
14 | =======
15 |
16 | Bungiesearch is a Django wrapper for
17 | `elasticsearch-dsl-py `__.
18 | It inherits from elasticsearch-dsl-py's ``Search`` class, so all the
19 | fabulous features developed by the elasticsearch-dsl-py team are also
20 | available in Bungiesearch. In addition, just like ``Search``,
21 | Bungiesearch is a lazy searching class (and iterable), meaning you can
22 | call functions in a row, or do something like the following.
23 |
24 | .. code:: python
25 |
26 | lazy = Article.objects.search.query('match', _all='Description')
27 | print len(lazy) # Prints the number of hits by only fetching the number of items.
28 | for item in lazy[5:10]:
29 | print item
30 |
31 | Features
32 | ========
33 |
34 | - Core Python friendly
35 |
36 | - Iteration (``[x for x in lazy_search]``)
37 | - Get items (``lazy_search[10]``)
38 | - Number of hits via ``len`` (``len(lazy_search)``)
39 |
40 | - Index management
41 |
42 | - Creating and deleting an index.
43 | - Creating, updating and deleting doctypes and their mappings.
44 | - Update index doctypes.
45 |
46 | - Django Model Mapping
47 |
48 | - Very easy mapping (no lies).
49 | - Automatic model mapping (and supports undefined models by
50 | returning a ``Result`` instance of ``elasticsearch-dsl-py``).
51 | - Efficient database fetching:
52 |
53 | - One fetch for all items of a given model.
54 | - Fetches only desired fields.
55 |
56 | - Django Manager
57 |
58 | - Easy model integration:
59 | ``MyModel.search.query("match", _all="something to search")``.
60 | - Search aliases (search shortcuts with as many parameters as
61 | wanted): ``Tweet.object.bungie_title_search("bungie")`` or
62 | ``Article.object.bungie_title_search("bungie")``, where
63 | ``bungie_title_search`` is uniquely defined.
64 |
65 | - Django signals
66 |
67 | - Connect to post save and pre delete signals for the elasticsearch
68 | index to correctly reflect the database (almost) at all times.
69 |
70 | - Requirements
71 |
72 | - Django >= 1.8
73 | - Python 2.7, 3.4, 3.5
74 |
75 | Feature examples
76 | ----------------
77 |
78 | See section "Full example" at the bottom of page to see the code needed
79 | to perform these following examples. ### Query a word (or list thereof)
80 | on a managed model.
81 |
82 | ``Article.objects.search.query('match', _all='Description')``
83 |
84 | Use a search alias on a model's manager.
85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
86 |
87 | ``Article.objects.bsearch_title_search('title')``
88 |
89 | Use a search alias on a bungiesearch instance.
90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
91 |
92 | ``Article.objects.search.bsearch_title_search('title').bsearch_titlefilter('filter this title')``
93 |
94 | Iterate over search results
95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
96 |
97 | .. code:: python
98 |
99 | # Will print the Django model instance.
100 | for result in Article.objects.search.query('match', _all='Description'):
101 | print result
102 |
103 | Fetch a single item
104 | ~~~~~~~~~~~~~~~~~~~
105 |
106 | .. code:: python
107 |
108 | Article.objects.search.query('match', _all='Description')[0]
109 |
110 | Get the number of returned items
111 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
112 |
113 | .. code:: python
114 |
115 | print len(Article.objects.search.query('match', _all='Description'))
116 |
117 | Deferred model instantiation
118 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119 |
120 | .. code:: python
121 |
122 | # Will print the Django model instance's primary key. Will only fetch the `pk` field from the database.
123 | for result in Article.objects.search.query('match', _all='Description').only('pk'):
124 | print result.pk
125 |
126 | Elasticsearch limited field fetching
127 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
128 |
129 | .. code:: python
130 |
131 | # Will print the Django model instance. However, elasticsearch's response only has the `_id` field.
132 | for result in Article.objects.search.query('match', _all='Description').fields('_id'):
133 | print result
134 |
135 | Get a specific number of items with an offset.
136 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 |
138 | This is actually elasticseach-dsl-py functionality, but it's
139 | demonstrated here because we can iterate over the results via
140 | Bungiesearch.
141 |
142 | .. code:: python
143 |
144 | for item in Article.objects.bsearch_title_search('title').only('pk').fields('_id')[5:7]:
145 | print item
146 |
147 | Lazy objects
148 | ~~~~~~~~~~~~
149 |
150 | .. code:: python
151 |
152 | lazy = Article.objects.bsearch_title_search('title')
153 | print len(lazy)
154 | for item in lazy.filter('range', effective_date={'lte': '2014-09-22'}):
155 | print item
156 |
157 | Installation
158 | ============
159 |
160 | Unless noted otherwise, each step is required.
161 |
162 | Install the package
163 | -------------------
164 |
165 | The easiest way is to install the package from PyPi:
166 |
167 | ``pip install bungiesearch``
168 |
169 | **Note:** Check your version of Django after installing bungiesearch. It
170 | was reported to me directly that installing bungiesearch may upgrade
171 | your version of Django, although I haven't been able to confirm that
172 | myself. Bungiesearch depends on Django 1.7 and above.
173 |
174 | In Django
175 | ---------
176 |
177 | Updating your Django models
178 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
179 |
180 | **Note:** this part is only needed if you want to be able to use search
181 | aliases, which allow you to define shortcuts to complex queries,
182 | available directly from your Django models. I think it's extremely
183 | practical.
184 |
185 | 1. Open your ``models.py`` file.
186 | 2. Add the bungiesearch manager import:
187 | ``from bungiesearch.managers import BungiesearchManager``
188 | 3. Find the model, or models, you wish to index on Elasticsearch and set
189 | them to be managed by Bungiesearch by adding the objects field to
190 | them, as such: ``objects = BungiesearchManager()``. You should now
191 | have a Django model `similar to
192 | this `__.
193 |
194 | Creating bungiesearch search indexes
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 |
197 | The search indexes define how bungiesearch should serialize each of the
198 | model's objects. It effectively defines how your object is serialized
199 | and how the ES index should be structured. These are referred to as
200 | `ModelIndex `__\ es.
201 |
202 | A good practice here is to have all the bungiesearch stuff in its own
203 | package. For example, for the section of the Sparrho platform that uses
204 | Django, we have a package called ``search`` where we define the search
205 | indexes, and a subpackage called ``aliases`` which has the many aliases
206 | we use (more on that latter).
207 |
208 | 1. Create a subclass of ``ModelIndex``, which you can import from from
209 | ``bungiesearch.indices import ModelIndex``, in a new module
210 | preferably.
211 | 2. In this class, define a class called ``Meta``: it will hold meta
212 | information of this search index for bungiesearch's internal working.
213 | 3. Import the Django model you want to index (from your models file)
214 | and, in the Meta class, define a field called ``model``, which must
215 | be set to the model you want indexed.
216 | 4. By default, bungiesearch will index every field of your model. This
217 | may not always be desired, so you can define which fields must be
218 | excluded in this ``Meta`` class, via the exclude field.
219 | 5. There are plenty of options, so definitely have a read through the
220 | documentation for
221 | `ModelIndex `__.
222 |
223 | Here's `an
224 | example `__ of a
225 | search index. There can be many such definitions in a file.
226 |
227 | Django settings
228 | ~~~~~~~~~~~~~~~
229 |
230 | This is the final required step. Here's the `full
231 | documentation `__ of
232 | this step.
233 |
234 | 1. Open your settings file and add a ``BUNGIESEARCH`` variable, which
235 | must be a dictionary.
236 | 2. Define ``URLS`` as a list of URLs (which can contain only one) of
237 | your ES servers.
238 | 3. Define the ``INDICES`` key as a dictionary where the key is the name
239 | of the index on ES that you want, and the value is the full Python
240 | path to the module which has all the ModelIndex classes for to be
241 | indexed on that index name.
242 | 4. Set ``ALIASES`` to an empty dictionary (until you define any search
243 | aliases).
244 | 5. You can keep other values as their defaults.
245 |
246 | In your shell
247 | -------------
248 |
249 | Create the ES indexes
250 | ~~~~~~~~~~~~~~~~~~~~~
251 |
252 | From your shell, in the Django environment, run the following:
253 |
254 | ``python manage.py search_index --create``
255 |
256 | Start populating the index
257 | --------------------------
258 |
259 | Run the following which will take each of the objects in your model,
260 | serialize them, and add them to the elasticsearch index.
261 |
262 | ``python manage.py search_index --update``
263 |
264 | **Note:** With additional parameters, you can limit the number of
265 | documents to be indexed, as well as set conditions on whether they
266 | should be indexed based on updated time for example.
267 |
268 | In Elasticsearch
269 | ----------------
270 |
271 | You can now open your elasticsearch dashboard, such as Elastic HQ, and
272 | see that your index is created with the appropriate mapping and has
273 | items that are indexed.
274 |
275 | Quick start example
276 | ===================
277 |
278 | This example is from the ``test`` folder. It may be partially out-dated,
279 | so please refer to the ``test`` folder for the latest version.
280 |
281 | Procedure
282 | ---------
283 |
284 | 1. In your models.py file (or your managers.py), import bungiesearch and
285 | use it as a model manager.
286 | 2. Define one or more ModelIndex subclasses which define the mapping
287 | between your Django model and elasticsearch.
288 | 3. (Optional) Define SearchAlias subclasses which make it trivial to
289 | call complex elasticsearch-dsl-py functions.
290 | 4. Add a BUNGIESEARCH variable in your Django settings, which must
291 | contain the elasticsearch URL(s), the modules for the indices, the
292 | modules for the search aliases and the signal definitions.
293 |
294 | Example
295 | -------
296 |
297 | Here's the code which is applicable to the previous examples. ### Django
298 | Model
299 |
300 | .. code:: python
301 |
302 | from django.db import models
303 | from bungiesearch.managers import BungiesearchManager
304 |
305 | class Article(models.Model):
306 | title = models.TextField(db_index=True)
307 | authors = models.TextField(blank=True)
308 | description = models.TextField(blank=True)
309 | link = models.URLField(max_length=510, unique=True, db_index=True)
310 | published = models.DateTimeField(null=True)
311 | created = models.DateTimeField(auto_now_add=True)
312 | updated = models.DateTimeField(null=True)
313 | tweet_count = models.IntegerField()
314 | raw = models.BinaryField(null=True)
315 | source_hash = models.BigIntegerField(null=True)
316 | missing_data = models.CharField(blank=True, max_length=255)
317 | positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
318 | negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
319 | popularity_index = models.IntegerField(default=0)
320 |
321 | objects = BungiesearchManager()
322 |
323 | class Meta:
324 | app_label = 'core'
325 |
326 | ModelIndex
327 | ~~~~~~~~~~
328 |
329 | The following ModelIndex will generate a mapping containing all fields
330 | from ``Article``, minus those defined in ``ArticleIndex.Meta.exclude``.
331 | When the mapping is generated, each field will the most appropriate
332 | `elasticsearch core
333 | type `__,
334 | with default attributes (as defined in bungiesearch.fields).
335 |
336 | These default attributes can be overwritten with
337 | ``ArticleIndex.Meta.hotfixes``: each dictionary key must be field
338 | defined either in the model or in the ModelIndex subclass
339 | (``ArticleIndex`` in this case).
340 |
341 | .. code:: python
342 |
343 | from core.models import Article
344 | from bungiesearch.fields import DateField, StringField
345 | from bungiesearch.indices import ModelIndex
346 |
347 |
348 | class ArticleIndex(ModelIndex):
349 | effectived_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
350 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
351 |
352 | class Meta:
353 | model = Article
354 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
355 | hotfixes = {'updated': {'null_value': '2013-07-01'},
356 | 'title': {'boost': 1.75},
357 | 'description': {'boost': 1.35},
358 | 'full_text': {'boost': 1.125}}
359 |
360 | SearchAlias
361 | ~~~~~~~~~~~
362 |
363 | Defines a search alias for one or more models (in this case only for
364 | ``core.models.Article``).
365 |
366 | .. code:: python
367 |
368 | from core.models import Article
369 | from bungiesearch.aliases import SearchAlias
370 |
371 |
372 | class SearchTitle(SearchAlias):
373 | def alias_for(self, title):
374 | return self.search_instance.query('match', title=title)
375 |
376 | class Meta:
377 | models = (Article,)
378 | alias_name = 'title_search' # This is optional. If none is provided, the name will be the class name in lower case.
379 |
380 | class InvalidAlias(SearchAlias):
381 | def alias_for_does_not_exist(self, title):
382 | return title
383 |
384 | class Meta:
385 | models = (Article,)
386 |
387 | Django settings
388 | ~~~~~~~~~~~~~~~
389 |
390 | .. code:: python
391 |
392 | BUNGIESEARCH = {
393 | 'URLS': [os.getenv('ELASTIC_SEARCH_URL')],
394 | 'INDICES': {'bungiesearch_demo': 'core.search_indices'},
395 | 'ALIASES': {'bsearch': 'myproject.search_aliases'},
396 | 'SIGNALS': {'BUFFER_SIZE': 1} # uses BungieSignalProcessor
397 | }
398 |
399 | Documentation
400 | =============
401 |
402 | ModelIndex
403 | ----------
404 |
405 | A ``ModelIndex`` defines mapping and object extraction for indexing of a
406 | given Django model.
407 |
408 | Any Django model to be managed by bungiesearch must have a defined
409 | ModelIndex subclass. This subclass must contain a subclass called
410 | ``Meta`` which must have a ``model`` attribute (sets the model which it
411 | represents).
412 |
413 | Class attributes
414 | ~~~~~~~~~~~~~~~~
415 |
416 | As detailed below, the doc type mapping will contain fields from the
417 | model it related to. However, one may often need to index fields which
418 | correspond to either a concatenation of fields of the model or some
419 | logical operation.
420 |
421 | Bungiesearch makes this very easy: simply define a class attribute as
422 | whichever core type, and set to the ``eval_as`` constructor parameter to
423 | a one line Python statement. The object is referenced as ``obj`` (not
424 | ``self`` nor ``object``, just ``obj``).
425 |
426 | Example
427 | ^^^^^^^
428 |
429 | This is a partial example as the Meta subclass is not defined, yet
430 | mandatory (cf. below).
431 |
432 | .. code:: python
433 |
434 | from bungiesearch.fields import DateField, StringField
435 | from bungiesearch.indices import ModelIndex
436 |
437 | class ArticleIndex(ModelIndex):
438 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
439 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
440 |
441 | Here, both ``effective_date`` and ``meta_data`` will be part of the doc
442 | type mapping, but won't be reversed mapped since those fields do not
443 | exist in the model.
444 |
445 | This can also be used to index foreign keys:
446 |
447 | .. code:: python
448 |
449 | some_field_name = StringField(eval_as='",".join([item for item in obj.some_foreign_relation.values_list("some_field", flat=True)]) if obj.some_foreign_relation else ""')
450 |
451 | Class methods
452 | ~~~~~~~~~~~~~
453 |
454 | matches\_indexing\_condition
455 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
456 |
457 | Override this function to specify whether an item should be indexed or
458 | not. This is useful when defining multiple indices (and ModelIndex
459 | classes) for a given model. This method's signature and super class code
460 | is as follows, and allows indexing of all items.
461 |
462 | .. code:: python
463 |
464 | def matches_indexing_condition(self, item):
465 | return True
466 |
467 | For example, if a given elasticsearch index should contain only item
468 | whose title starts with ``"Awesome"``, then this method can be
469 | overridden as follows.
470 |
471 | .. code:: python
472 |
473 | def matches_indexing_condition(self, item):
474 | return item.title.startswith("Awesome")
475 |
476 | Meta subclass attributes
477 | ~~~~~~~~~~~~~~~~~~~~~~~~
478 |
479 | **Note**: in the following, any variable defined a being a ``list``
480 | could also be a ``tuple``. ##### model *Required:* defines the Django
481 | model for which this ModelIndex is applicable.
482 |
483 | fields
484 | ^^^^^^
485 |
486 | *Optional:* list of fields (or columns) which must be fetched when
487 | serializing the object for elasticsearch, or when reverse mapping the
488 | object from elasticsearch back to a Django Model instance. By default,
489 | all fields will be fetched. Setting this *will* restrict which fields
490 | can be fetched and may lead to errors when serializing the object. It is
491 | recommended to use the ``exclude`` attribute instead (cf. below).
492 |
493 | exclude
494 | ^^^^^^^
495 |
496 | *Optional:* list of fields (or columns) which must not be fetched when
497 | serializing or deserializing the object.
498 |
499 | hotfixes
500 | ^^^^^^^^
501 |
502 | *Optional:* a dictionary whose keys are index fields and whose values
503 | are dictionaries which define `core type
504 | attributes `__.
505 | By default, there aren't any special settings, apart for String fields,
506 | where the
507 | `analyzer `__
508 | is set to
509 | ```snowball`` `__
510 | (``{'analyzer': 'snowball'}``).
511 |
512 | additional\_fields
513 | ^^^^^^^^^^^^^^^^^^
514 |
515 | *Optional:* additional fields to fetch for mapping, may it be for
516 | ``eval_as`` fields or when returning the object from the database.
517 |
518 | id\_field
519 | ^^^^^^^^^
520 |
521 | *Optional:* the model field to use as a unique ID for elasticsearch's
522 | metadata ``_id``. Defaults to ``id`` (also called
523 | ```pk`` `__).
524 |
525 | updated\_field
526 | ^^^^^^^^^^^^^^
527 |
528 | *Optional:* set the model's field which can be filtered on dates in
529 | order to find when objects have been updated. Note, this is *mandatory*
530 | to use ``--start`` and/or ``--end`` when updating index (with
531 | ``search_index --update``).
532 |
533 | optimize\_queries
534 | ^^^^^^^^^^^^^^^^^
535 |
536 | *Optional:* set to True to make efficient queries when automatically
537 | mapping to database objects. This will *always* restrict fetching to the
538 | fields set in ``fields`` and in ``additional_fields``. *Note:* You can
539 | also perform an optimal database query with ``.only('__model')``, which
540 | will use the same fields as ``optimize_queries``, or
541 | ``.only('__fields')``, which will use the fields provided in the
542 | ``.fields()`` call.
543 |
544 | indexing\_query
545 | ^^^^^^^^^^^^^^^
546 |
547 | *Optional:* set to a QuerySet instance to specify the query used when
548 | the search\_index command is ran to index. This **does not** affect how
549 | each piece of content is indexed.
550 |
551 | default
552 | ^^^^^^^
553 |
554 | Enables support for a given model to be indexed on several elasticsearch
555 | indices. Set to ``False`` on all but the default index. **Note**: if all
556 | managed models are set with ``default=False`` then Bungiesearch will
557 | fail to find and index that model.
558 |
559 | Example
560 | ~~~~~~~
561 |
562 | Indexes all objects of ``Article``, as long as their ``updated``
563 | datetime is less than `21 October 2015
564 | 04:29 `__.
565 |
566 | .. code:: python
567 |
568 | from core.models import Article
569 | from bungiesearch.indices import ModelIndex
570 | from datetime import datetime
571 |
572 | class ArticleIndex(ModelIndex):
573 |
574 | def matches_indexing_condition(self, item):
575 | return item.updated < datetime.datetime(2015, 10, 21, 4, 29)
576 |
577 | class Meta:
578 | model = Article
579 | id_field = 'id' # That's actually the default value, so it's not really needed.
580 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
581 | hotfixes = {'updated': {'null_value': '2013-07-01'},
582 | 'title': {'boost': 1.75},
583 | 'description': {'boost': 1.35},
584 | 'full_text': {'boost': 1.125}}
585 | optimize_queries = True
586 | indexing_query = Article.objects.defer(*exclude).select_related().all().prefetch_related('tags')
587 |
588 | SearchAlias
589 | -----------
590 |
591 | A ``SearchAlias`` define search shortcuts (somewhat similar to `Django
592 | managers `__).
593 | Often times, a given search will be used in multiple parts of the code.
594 | SearchAliases allow you define those queries, filters, or any
595 | bungiesearch/elasticsearch-dsl-py calls as an alias.
596 |
597 | A search alias is either applicable to a ``list`` (or ``tuple``) of
598 | managed models, or to any bungiesearch instance. It's very simple, so
599 | here's an example which is detailed right below.
600 |
601 | Example
602 | ~~~~~~~
603 |
604 | The most simple implementation of a SearchAlias is as follows. This
605 | search alias can be called via ``Article.objects.bungie_title`` (or
606 | ``Article.objects.search.bungie_title``), supposing that the namespace
607 | is set to ``None`` in the settings (cf. below).
608 |
609 | Definition
610 | ^^^^^^^^^^
611 |
612 | .. code:: python
613 |
614 | from bungiesearch.aliases import SearchAlias
615 |
616 | class Title(SearchAlias):
617 | def alias_for(self, title):
618 | return self.search_instance.query('match', title=title)
619 |
620 | Usage
621 | ^^^^^
622 |
623 | .. code:: python
624 |
625 | Article.objects.bungie_title('title')
626 |
627 | Method overwrite
628 | ~~~~~~~~~~~~~~~~
629 |
630 | Any implementation needs to inherit from
631 | ``bungiesearch.aliases.SearchAlias`` and overwrite ``alias_for``. You
632 | can set as many or as little parameters as you want for that function
633 | (since bungiesearch only return the pointer to that function without
634 | actually calling it).
635 |
636 | Since each managed model has its own doc type, ``self.search_instance``
637 | is a bungiesearch instance set to search the specific doctype.
638 |
639 | Meta subclass attributes
640 | ~~~~~~~~~~~~~~~~~~~~~~~~
641 |
642 | Although not mandatory, the ``Meta`` subclass enabled custom naming and
643 | model restrictions for a search alias.
644 |
645 | models
646 | ^^^^^^
647 |
648 | *Optional:* ``list`` (or ``tuple``) of Django models which are allowed
649 | to use this search alias. If a model which is not allowed to use this
650 | SearchAlias tries it, a ``ValueError`` will be raised.
651 |
652 | alias\_name
653 | ^^^^^^^^^^^
654 |
655 | *Optional:* A string corresponding the suffix name of this search alias.
656 | Defaults to the lower case class name.
657 |
658 | **WARNING**: As explained in the "Settings" section below, all search
659 | aliases in a given module share the prefix (or namespace). This is to
660 | prevent aliases from accidently overwriting Django manager function
661 | (e.g. ``update`` or ``get``). In other words, if you define the
662 | ``alias_name`` to ``test``, then it must be called as
663 | ``model_obj.objects.$prefix$_test`` where ``$prefix$`` is the prefix
664 | defined in the settings. This prefix is also applicable to search
665 | aliases which are available via bungiesearch instances directly. Hence,
666 | one can define in one module search utilities (e.g. ``regex`` and
667 | ``range``) and define model specific aliases (e.g. ``title``) in another
668 | module, and use both in conjunction as such:
669 | ``Article.objects.search.bungie_title('search title').utils_range(field='created', gte='2014-05-20', as_query=True)``.
670 | These aliases can be concatenated ad vitam aeternam.
671 |
672 | Sophisticated example
673 | ~~~~~~~~~~~~~~~~~~~~~
674 |
675 | This example shows that we can have some fun with search aliases. In
676 | this case, we define a Range alias which is applicable to any field on
677 | any model.
678 |
679 | .. code:: python
680 |
681 | class Range(SearchAlias):
682 | def alias_for(self, field, gte=None, lte=None, boost=None, as_query=False):
683 | body = {field: {}}
684 | if gte:
685 | body[field]['gte'] = gte
686 | if lte:
687 | body[field]['lte'] = lte
688 | if boost:
689 | if not as_query:
690 | logging.warning('Boost is not applicable to search alias Range when not used as a query.')
691 | else:
692 | body[field]['boost'] = boost
693 | if as_query:
694 | return self.search_instance.query({'range': body})
695 | return self.search_instance.filter({'range': body})
696 |
697 | We can use it as such
698 | ``Article.objects.bungie_range(field='created', gte='2014-05-20', as_query=True)``.
699 |
700 | Settings
701 | --------
702 | Add 'bungiesearch' to INSTALLED_APPS.
703 |
704 | You must define ``BUNGIESEARCH`` in your Django settings in order for
705 | bungiesearch to know elasticsearch URL(s) and which index name contains
706 | mappings for each ModelIndex.
707 |
708 | .. code:: python
709 |
710 | BUNGIESEARCH = {
711 | 'URLS': ['localhost'], # No leading http:// or the elasticsearch client will complain.
712 | 'INDICES': {'main_index': 'myproject.myapp.myindices'} # Must be a module path.
713 | 'ALIASES': {'bsearch': 'myproject.search_aliases'},
714 | 'SIGNALS': {'BUFFER_SIZE': 1},
715 | 'TIMEOUT': 5
716 | }
717 |
718 | URLS
719 | ~~~~
720 |
721 | *Required:* must be a list of URLs which host elasticsearch instance(s).
722 | This is directly sent to elasticsearch-dsl-py, so any issue with
723 | multiple URLs should be refered to them.
724 |
725 | INDICES
726 | ~~~~~~~
727 |
728 | *Required:* must be a dictionary where each key is the name of an
729 | elasticsearch index and each value is a path to a Python module
730 | containing classes which inherit from
731 | ``bungiesearch.indices.ModelIndex`` (cf. below).
732 |
733 | ALIASES
734 | ~~~~~~~
735 |
736 | *Optional:* a dictionary whose key is the alias namespace and whose
737 | value is the Python module containing classes which inherit from
738 | ``bungiesearch.aliases.SearchAlias``. If the namespace is ``None``, then
739 | the alias will be named ``bungie``. If the namespace is an empty string,
740 | there will be no alias namespace. The provided namespace will be
741 | appended by an underscore. In the example above, each search alias
742 | defined in ``myproject.search_aliases`` will be referenced as
743 | ``$ModelObj$.objects.bsearch_$alias$``, where ``$ModelObj$`` is a Django
744 | model and ``$alias$`` is the name of the search alias.
745 |
746 | The purpose is to not accidently overwrite Django's default manager
747 | functions with search aliases.
748 |
749 | SIGNALS
750 | ~~~~~~~
751 |
752 | *Optional:* if it exists, it must be a dictionary (even empty), and will
753 | connect to the ``post save`` and ``pre delete`` model functions of *all*
754 | models using ``bungiesearch.managers.BungiesearchManager`` as a manager.
755 | One may also define a signal processor class for more custom
756 | functionality by placing the string value of the module path under a key
757 | called ``SIGNAL_CLASS`` in the dictionary value of ``SIGNALS`` and
758 | defining ``setup`` and ``teardown`` methods, which take ``model`` as the
759 | only parameter. These methods connect and disconnect the signal
760 | processing class to django signals (signals are connected to each model
761 | which uses a BungiesearchManager).
762 |
763 | If ``SIGNALS`` is not defined in the settings, *none* of the models
764 | managed by BungiesearchManager will automatically update the index when
765 | a new item is created or deleted.
766 |
767 | BUFFER\_SIZE
768 | ^^^^^^^^^^^^
769 |
770 | *Optional:* an integer representing the number of items to buffer before
771 | making a bulk index update, defaults to ``100``.
772 |
773 | **WARNING**: if your application is shut down before the buffer is
774 | emptied, then any buffered instance *will not* be indexed on
775 | elasticsearch. Hence, a possibly better implementation is wrapping
776 | ``post_save_connector`` and ``pre_delete_connector`` from
777 | ``bungiesearch.signals`` in a celery task. It is not implemented as such
778 | here in order to not require ``celery``.
779 |
780 | TIMEOUT
781 | ~~~~~~~
782 |
783 | *Optional:* Elasticsearch connection timeout in seconds. Defaults to
784 | ``5``.
785 |
786 | Testing
787 | =======
788 |
789 | The easiest way to run the tests is to install all dev dependencies using
790 | ``./setup.sh`` then run ``./test.sh``
791 |
792 | All Bungiesearch tests are in ``tests/core/test_bungiesearch.py``. You
793 | can run the tests by creating a Python virtual environment, installing
794 | the requirements from ``requirements.txt``, installing the package
795 | (``pip install .``) and running ``python tests/manage.py test``. Make
796 | sure to update ``tests/settings.py`` to use your own elasticsearch URLs,
797 | or update the ELASTIC\_SEARCH\_URL environment variable.
798 |
799 | .. |Build Status| image:: https://travis-ci.org/ChristopherRabotin/bungiesearch.svg?branch=master
800 | :target: https://travis-ci.org/ChristopherRabotin/bungiesearch
801 | .. |Coverage Status| image:: https://coveralls.io/repos/ChristopherRabotin/bungiesearch/badge.svg?branch=master&service=github
802 | :target: https://coveralls.io/github/ChristopherRabotin/bungiesearch?branch=master
803 |
--------------------------------------------------------------------------------
/bungiesearch/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from importlib import import_module
3 |
4 | from django.conf import settings
5 | from elasticsearch.client import Elasticsearch
6 | from elasticsearch_dsl.search import Search
7 | from six import iteritems, itervalues, string_types
8 |
9 | from .aliases import SearchAlias
10 | from .indices import ModelIndex
11 | from .logger import logger
12 |
13 |
14 | class Bungiesearch(Search):
15 | '''
16 | This object is used to read Django settings and initialize the elasticsearch connection.
17 | '''
18 | DEFAULT_TIMEOUT = 5
19 | BUNGIE = settings.BUNGIESEARCH
20 |
21 | # The following code loads each model index_name module (as defined in the settings) and stores
22 | # index_name name to model index_name, and index_name name to model. Settings shouldn't change between
23 | # subsequent calls to Search(), which is why this is static code.
24 |
25 | _cached_es_instances = {}
26 | # Let's go through the settings in order to map each defined Model/ModelIndex to the elasticsearch index_name.
27 | _model_to_index, _model_name_to_index, _model_name_to_model_idx = defaultdict(list), defaultdict(list), defaultdict(list)
28 | _index_to_model, _idx_name_to_mdl_to_mdlidx = defaultdict(list), defaultdict(dict)
29 | _model_name_to_default_index, _alias_hooks = {}, {}
30 | _managed_models = []
31 | __loaded_indices__ = False
32 |
33 | @classmethod
34 | def __load_settings__(cls):
35 | if cls.__loaded_indices__:
36 | return
37 | cls.__loaded_indices__ = True
38 |
39 | # Loading indices.
40 | for index_name, module_str in iteritems(cls.BUNGIE['INDICES']):
41 | index_module = import_module(module_str)
42 | for index_obj in itervalues(index_module.__dict__):
43 | try:
44 | if issubclass(index_obj, ModelIndex) and index_obj != ModelIndex:
45 | index_instance = index_obj()
46 | assoc_model = index_instance.get_model()
47 | cls._index_to_model[index_name].append(assoc_model)
48 | cls._model_name_to_model_idx[assoc_model.__name__].append(index_instance)
49 | cls._idx_name_to_mdl_to_mdlidx[index_name][assoc_model.__name__] = index_instance
50 | if index_instance.is_default:
51 | if assoc_model.__name__ in cls._model_name_to_default_index:
52 | raise AttributeError('ModelIndex {} on index {} is marked as default, but {} was already set as default.'.format(index_instance, index_name, cls._model_name_to_default_index[assoc_model.__name__]))
53 | cls._model_name_to_default_index[assoc_model.__name__] = index_instance
54 | except TypeError:
55 | pass # Oops, just attempted to get subclasses of a non-class.
56 |
57 | # Create reverse maps in order to have O(1) access.
58 | for index_name, models in iteritems(cls._index_to_model):
59 | for model in models:
60 | cls._model_to_index[model].append(index_name)
61 | cls._model_name_to_index[model.__name__].append(index_name)
62 |
63 | # Loading aliases.
64 | for alias_prefix, module_str in iteritems(cls.BUNGIE.get('ALIASES', {})):
65 | if alias_prefix is None:
66 | alias_prefix = 'bungie'
67 | if alias_prefix != '':
68 | alias_prefix += '_'
69 | alias_module = import_module(module_str)
70 | for alias_obj in itervalues(alias_module.__dict__):
71 | try:
72 | if issubclass(alias_obj, SearchAlias) and alias_obj != SearchAlias:
73 | alias_instance = alias_obj()
74 | cls._alias_hooks[alias_prefix + alias_instance.alias_name] = alias_instance
75 | except TypeError:
76 | pass # Oops, just attempted to get subclasses of a non-class.
77 |
78 | @classmethod
79 | def _build_key(cls, urls, timeout, **settings):
80 | # Order the settings by key and then turn it into a string with
81 | # repr. There are a lot of edge cases here, but the worst that
82 | # happens is that the key is different and so you get a new
83 | # Elasticsearch. We'll probably have to tweak this.
84 | settings = sorted(settings.items(), key=lambda item: item[0])
85 | settings = repr([(k, v) for k, v in settings])
86 | # elasticsearch allows URLs to be a string, so we make sure to
87 | # account for that when converting whatever it is into a tuple.
88 | if isinstance(urls, string_types):
89 | urls = (urls,)
90 | else:
91 | urls = tuple(urls)
92 | # Generate a tuple of all the bits and return that as the key
93 | # because that's hashable.
94 | key = (urls, timeout, settings)
95 | return key
96 |
97 | @classmethod
98 | def get_index(cls, model, via_class=False):
99 | '''
100 | Returns the index name (as a string) for the given model as a class or a string.
101 | :param model: model name or model class if via_class set to True.
102 | :param via_class: set to True if parameter model is a class.
103 | :raise KeyError: If the provided model does not have any index associated.
104 | '''
105 | try:
106 | return cls._model_to_index[model] if via_class else cls._model_name_to_index[model]
107 | except KeyError:
108 | raise KeyError('Could not find any index defined for model {}. Is the model in one of the model index modules of BUNGIESEARCH["INDICES"]?'.format(model))
109 |
110 | @classmethod
111 | def get_model_index(cls, model, default=True):
112 | '''
113 | Returns the default model index for the given model, or the list of indices if default is False.
114 | :param model: model name as a string.
115 | :raise KeyError: If the provided model does not have any index associated.
116 | '''
117 | try:
118 | if default:
119 | return cls._model_name_to_default_index[model]
120 | return cls._model_name_to_model_idx[model]
121 | except KeyError:
122 | raise KeyError('Could not find any model index defined for model {}.'.format(model))
123 |
124 | @classmethod
125 | def get_indices(cls):
126 | '''
127 | Returns the list of indices defined in the settings.
128 | '''
129 | return cls._idx_name_to_mdl_to_mdlidx.keys()
130 |
131 | @classmethod
132 | def get_models(cls, index, as_class=False):
133 | '''
134 | Returns the list of models defined for this index.
135 | :param index: index name.
136 | :param as_class: set to True to return the model as a model object instead of as a string.
137 | '''
138 | try:
139 | return cls._index_to_model[index] if as_class else cls._idx_name_to_mdl_to_mdlidx[index].keys()
140 | except KeyError:
141 | raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))
142 |
143 | @classmethod
144 | def get_model_indices(cls, index):
145 | '''
146 | Returns the list of model indices (i.e. ModelIndex objects) defined for this index.
147 | :param index: index name.
148 | '''
149 | try:
150 | return cls._idx_name_to_mdl_to_mdlidx[index].values()
151 | except KeyError:
152 | raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))
153 |
154 | @classmethod
155 | def map_raw_results(cls, raw_results, instance=None):
156 | '''
157 | Maps raw results to database model objects.
158 | :param raw_results: list raw results as returned from elasticsearch-dsl-py.
159 | :param instance: Bungiesearch instance if you want to make use of `.only()` or `optmize_queries` as defined in the ModelIndex.
160 | :return: list of mapped results in the *same* order as returned by elasticsearch.
161 | '''
162 | # Let's iterate over the results and determine the appropriate mapping.
163 | model_results = defaultdict(list)
164 | # Initializing the list to the number of returned results. This allows us to restore each item in its position.
165 | if hasattr(raw_results, 'hits'):
166 | results = [None] * len(raw_results.hits)
167 | else:
168 | results = [None] * len(raw_results)
169 | found_results = {}
170 | for pos, result in enumerate(raw_results):
171 | model_name = result.meta.doc_type
172 | if model_name not in Bungiesearch._model_name_to_index or result.meta.index not in Bungiesearch._model_name_to_index[model_name]:
173 | logger.warning('Returned object of type {} ({}) is not defined in the settings, or is not associated to the same index as in the settings.'.format(model_name, result))
174 | results[pos] = result
175 | else:
176 | meta = Bungiesearch.get_model_index(model_name).Meta
177 | model_results['{}.{}'.format(result.meta.index, model_name)].append(result.meta.id)
178 | found_results['{1.meta.index}.{0}.{1.meta.id}'.format(model_name, result)] = (pos, result.meta)
179 |
180 | # Now that we have model ids per model name, let's fetch everything at once.
181 | for ref_name, ids in iteritems(model_results):
182 | index_name, model_name = ref_name.split('.')
183 | model_idx = Bungiesearch._idx_name_to_mdl_to_mdlidx[index_name][model_name]
184 | model_obj = model_idx.get_model()
185 | items = model_obj.objects.filter(pk__in=ids)
186 | if instance:
187 | if instance._only == '__model' or model_idx.optimize_queries:
188 | desired_fields = model_idx.fields_to_fetch
189 | elif instance._only == '__fields':
190 | desired_fields = instance._fields
191 | else:
192 | desired_fields = instance._only
193 |
194 | if desired_fields: # Prevents setting the database fetch to __fields but not having specified any field to elasticsearch.
195 | items = items.only(
196 | *[field.name
197 | for field in model_obj._meta.get_fields()
198 | # For complete backwards compatibility, you may want to exclude
199 | # GenericForeignKey from the results.
200 | if field.name in desired_fields and \
201 | not (field.many_to_one and field.related_model is None)
202 | ]
203 | )
204 | # Let's reposition each item in the results and set the _searchmeta meta information.
205 | for item in items:
206 | pos, meta = found_results['{}.{}.{}'.format(index_name, model_name, item.pk)]
207 | item._searchmeta = meta
208 | results[pos] = item
209 |
210 | return results
211 |
212 | def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs):
213 | '''
214 | Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
215 | if it has already been initialized. Otherwise, creates a new one.
216 |
217 | If no parameters are passed, everything is determined from the Django settings.
218 |
219 | :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
220 | :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
221 | :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
222 | :param timeout: Timeout used in the connection.
223 | :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
224 | :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
225 | '''
226 |
227 | Bungiesearch.__load_settings__()
228 |
229 | urls = urls or Bungiesearch.BUNGIE['URLS']
230 | if not timeout:
231 | timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT)
232 |
233 | search_keys = ['using', 'index', 'doc_type', 'extra']
234 | search_settings, es_settings = {}, {}
235 | for k, v in iteritems(kwargs):
236 | if k in search_keys:
237 | search_settings[k] = v
238 | else:
239 | es_settings[k] = v
240 |
241 | if not es_settings:
242 | # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
243 | es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})
244 |
245 | # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
246 | cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
247 | es_instance = None
248 | if not force_new:
249 | if cache_key in Bungiesearch._cached_es_instances:
250 | es_instance = Bungiesearch._cached_es_instances[cache_key]
251 |
252 | if not es_instance:
253 | es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
254 | Bungiesearch._cached_es_instances[cache_key] = es_instance
255 |
256 | if 'using' not in search_settings:
257 | search_settings['using'] = es_instance
258 |
259 | super(Bungiesearch, self).__init__(**search_settings)
260 |
261 | # Creating instance attributes.
262 | self._only = [] # Stores the exact fields to fetch from the database when mapping.
263 | self.results = [] # Store the mapped and unmapped results.
264 | self._raw_results_only = raw_results
265 |
266 | def _clone(self):
267 | '''
268 | Must clone additional fields to those cloned by elasticsearch-dsl-py.
269 | '''
270 | instance = super(Bungiesearch, self)._clone()
271 | instance._raw_results_only = self._raw_results_only
272 | return instance
273 |
274 | def get_es_instance(self):
275 | '''
276 | Returns the low level elasticsearch instance to perform low level operations.
277 | '''
278 | return self._using
279 |
280 | def execute_raw(self):
281 | self.raw_results = super(Bungiesearch, self).execute()
282 |
283 | def execute(self, return_results=True):
284 | '''
285 | Executes the query and attempts to create model objects from results.
286 | '''
287 | if self.results:
288 | return self.results if return_results else None
289 |
290 | self.execute_raw()
291 |
292 | if self._raw_results_only:
293 | self.results = self.raw_results
294 | else:
295 | self.map_results()
296 |
297 | if return_results:
298 | return self.results
299 |
300 | def map_results(self):
301 | '''
302 | Maps raw results and store them.
303 | '''
304 | self.results = Bungiesearch.map_raw_results(self.raw_results, self)
305 |
306 | def only(self, *fields):
307 | '''
308 | Restricts the fields to be fetched when mapping. Set to `__model` to fetch all fields define in the ModelIndex.
309 | '''
310 | s = self._clone()
311 | if len(fields) == 1 and fields[0] == '__model':
312 | s._only = '__model'
313 | else:
314 | s._only = fields
315 | return s
316 |
317 | def __iter__(self):
318 | '''
319 | Allows iterating on the response.
320 | '''
321 | self.execute()
322 | return iter(self.results)
323 |
324 | def __len__(self):
325 | '''
326 | Return elasticsearch-dsl-py count.
327 | '''
328 | return self.count()
329 |
330 | def __getitem__(self, key):
331 | '''
332 | Overwriting the step in slice. It is used to set the results either as elasticsearch-dsl-py response object, or
333 | attempt to fetch the Django model instance.
334 | :warning: Getting an item will execute this search. Any search operation or field setting *must* be done prior to getting an item.
335 | '''
336 | if isinstance(key, slice):
337 | if key.step is not None:
338 | self._raw_results_only = key.step
339 | if key.start is not None and key.stop is not None:
340 | single_item = key.start - key.stop == -1
341 | elif key.start is None and key.stop == 1:
342 | single_item = True
343 | else:
344 | single_item = False
345 | key = slice(key.start, key.stop)
346 | else:
347 | single_item = False
348 | else:
349 | single_item = True
350 | results = super(Bungiesearch, self).__getitem__(key).execute()
351 | if single_item:
352 | try:
353 | return results[0]
354 | except IndexError:
355 | return []
356 | return results
357 |
358 | def hook_alias(self, alias, model_obj=None):
359 | '''
360 | Returns the alias function, if it exists and if it can be applied to this model.
361 | '''
362 | try:
363 | search_alias = self._alias_hooks[alias]
364 | except KeyError:
365 | raise AttributeError('Could not find search alias named {}. Is this alias defined in BUNGIESEARCH["ALIASES"]?'.format(alias))
366 | else:
367 | if search_alias._applicable_models and \
368 | ((model_obj and model_obj not in search_alias._applicable_models) or \
369 | not any([app_model_obj.__name__ in self._doc_type for app_model_obj in search_alias._applicable_models])):
370 | raise ValueError('Search alias {} is not applicable to model/doc_types {}.'.format(alias, model_obj if model_obj else self._doc_type))
371 | return search_alias.prepare(self, model_obj).alias_for
372 |
373 | def __getattr__(self, alias):
374 | '''
375 | Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__),
376 | this is only called as a last resort in case the attribute is not found.
377 | '''
378 | return self.hook_alias(alias)
379 |
--------------------------------------------------------------------------------
/bungiesearch/aliases.py:
--------------------------------------------------------------------------------
1 | class SearchAlias(object):
2 | '''
3 | Defines search aliases for specific models. Essentially works like Django Managers but for Bungiesearch.
4 | These work for both managers and bungiesearch instances. See the docs (and if they aren't clear, open an issue).
5 | '''
6 | def __init__(self):
7 | # Introspect the model, adding/removing fields as needed.
8 | # Adds/Excludes should happen only if the fields are not already
9 | # defined in `self.fields`.
10 | self._classname = type(self).__name__
11 | try:
12 | _meta = getattr(self, 'Meta')
13 | except AttributeError:
14 | self._applicable_models = []
15 | self.alias_name = self._classname.lower()
16 | else:
17 | self._applicable_models = getattr(_meta, 'models', None)
18 | self.alias_name = getattr(_meta, 'alias_name', self._classname.lower())
19 | self.search_instance = None
20 | self.model = None
21 |
22 | def _clone(self):
23 | s = self.__class__()
24 | s._classname = self._classname
25 | s._applicable_models = self._applicable_models
26 | s.alias_name = self.alias_name
27 | return s
28 |
29 | def prepare(self, search_instance, model_obj):
30 | s = self._clone()
31 | s.search_instance = search_instance
32 | s.model = model_obj
33 | return s
34 |
35 | def alias_for(self, **kwargs):
36 | raise NotImplementedError('{} does not provide an implementation for alias_for.'.format(self._classname))
37 |
38 | def get_model(self):
39 | if self.model:
40 | return self.model
41 | if self.search_instance._doc_type and len(self.search_instance._doc_type) == 1:
42 | idxes = self.search_instance._model_name_to_model_idx[self.search_instance._doc_type[0]]
43 | first_mdl = idxes[0].get_model()
44 | if all(mdlidx.get_model() == first_mdl for mdlidx in idxes[1:]):
45 | return first_mdl
46 | raise ValueError('SearchAlias {} is associated to more than one index, and the model is differs between indices!')
47 | raise ValueError('Instance associated to zero doc types or more than one.')
48 |
--------------------------------------------------------------------------------
/bungiesearch/fields.py:
--------------------------------------------------------------------------------
1 | from django.template import Context, loader
2 | from django.template.defaultfilters import striptags
3 | from six import iteritems
4 |
5 | from elasticsearch_dsl.analysis import Analyzer
6 |
7 |
8 | class AbstractField(object):
9 | '''
10 | Represents an elasticsearch index field and values from given objects.
11 | Currently does not support binary fields, but those can be created by manually providing a dictionary.
12 |
13 | Values are extracted using the `model_attr` or `eval_as` attribute.
14 | '''
15 | meta_fields = ['_index', '_uid', '_type', '_id']
16 | common_fields = ['index_name', 'store', 'index', 'boost', 'null_value', 'copy_to', 'type', 'fields']
17 | @property
18 | def fields(self):
19 | try:
20 | return self.fields
21 | except:
22 | raise NotImplementedError('Allowed fields are not defined.')
23 |
24 | @property
25 | def coretype(self):
26 | try:
27 | return self.coretype
28 | except:
29 | raise NotImplementedError('Core type is not defined!')
30 |
31 | @property
32 | def defaults(self):
33 | '''
34 | Stores default values.
35 | '''
36 | try:
37 | return self.defaults
38 | except:
39 | return {}
40 |
41 | def __init__(self, **args):
42 | '''
43 | Performs several checks to ensure that the provided attributes are valid. Will not check their values.
44 | '''
45 | if isinstance(self.coretype, list):
46 | if 'coretype' not in args:
47 | raise KeyError('{} can be represented as one of the following types: {}. Specify which to select as the `coretype` parameter.'.format(unicode(self), ', '.join(self.coretype)))
48 | if args['coretype'] not in self.coretype:
49 | raise KeyError('Core type {} is not supported by {}.'.format(args['coretype'], unicode(self)))
50 | self.type = args.pop('coretype')
51 | else:
52 | self.type = self.coretype
53 |
54 | self.model_attr = args.pop('model_attr', None)
55 | self.eval_func = args.pop('eval_as', None)
56 | self.template_name = args.pop('template', None)
57 |
58 | for attr, value in iteritems(args):
59 | if attr not in self.fields and attr not in AbstractField.common_fields:
60 | raise KeyError('Attribute `{}` is not allowed for core type {}.'.format(attr, self.coretype))
61 | setattr(self, attr, value)
62 |
63 | for attr, value in iteritems(self.defaults):
64 | if not hasattr(self, attr):
65 | setattr(self, attr, value)
66 |
67 | def value(self, obj):
68 | '''
69 | Computes the value of this field to update the index.
70 | :param obj: object instance, as a dictionary or as a model instance.
71 | '''
72 | if self.template_name:
73 | t = loader.select_template([self.template_name])
74 | return t.render(Context({'object': obj}))
75 |
76 | if self.eval_func:
77 | try:
78 | return eval(self.eval_func)
79 | except Exception as e:
80 | raise type(e)('Could not compute value of {} field (eval_as=`{}`): {}.'.format(unicode(self), self.eval_func, unicode(e)))
81 |
82 | elif self.model_attr:
83 | if isinstance(obj, dict):
84 | return obj[self.model_attr]
85 | current_obj = getattr(obj, self.model_attr)
86 |
87 | if callable(current_obj):
88 | return current_obj()
89 | else:
90 | return current_obj
91 |
92 | else:
93 | raise KeyError('{0} gets its value via a model attribute, an eval function, a template, or is prepared in a method '
94 | 'call but none of `model_attr`, `eval_as,` `template,` `prepare_{0}` is provided.'.format(unicode(self)))
95 |
96 | def json(self):
97 | json = {}
98 | for attr, val in iteritems(self.__dict__):
99 | if attr in ('eval_func', 'model_attr', 'template_name'):
100 | continue
101 | elif attr in ('analyzer', 'index_analyzer', 'search_analyzer') and isinstance(val, Analyzer):
102 | json[attr] = val.to_dict()
103 | else:
104 | json[attr] = val
105 |
106 | return json
107 |
108 | # All the following definitions could probably be done with better polymorphism.
109 | class StringField(AbstractField):
110 | coretype = 'string'
111 | fields = ['doc_values', 'term_vector', 'norms', 'index_options', 'analyzer', 'index_analyzer', 'search_analyzer', 'include_in_all', 'ignore_above', 'position_offset_gap', 'fielddata', 'similarity']
112 | defaults = {'analyzer': 'snowball'}
113 |
114 | def value(self, obj):
115 | val = super(StringField, self).value(obj)
116 | if val is None:
117 | return None
118 | return striptags(val)
119 |
120 | def __unicode__(self):
121 | return 'StringField'
122 |
123 | class NumberField(AbstractField):
124 | coretype = ['float', 'double', 'byte', 'short', 'integer', 'long']
125 | fields = ['doc_values', 'precision_step', 'include_in_all', 'ignore_malformed', 'coerce']
126 |
127 | def __unicode__(self):
128 | return 'NumberField'
129 |
130 | class DateField(AbstractField):
131 | coretype = 'date'
132 | fields = ['format', 'doc_values', 'precision_step', 'include_in_all', 'ignore_malformed']
133 |
134 | def __unicode__(self):
135 | return 'DateField'
136 |
137 | class BooleanField(AbstractField):
138 | coretype = 'boolean'
139 | fields = [] # No specific fields.
140 |
141 | def __unicode__(self):
142 | return 'BooleanField'
143 |
144 | # Correspondence between a Django field and an elasticsearch field.
145 | def django_field_to_index(field, **attr):
146 | '''
147 | Returns the index field type that would likely be associated with each Django type.
148 | '''
149 |
150 | dj_type = field.get_internal_type()
151 |
152 | if dj_type in ('DateField', 'DateTimeField'):
153 | return DateField(**attr)
154 | elif dj_type in ('BooleanField', 'NullBooleanField'):
155 | return BooleanField(**attr)
156 | elif dj_type in ('DecimalField', 'FloatField'):
157 | return NumberField(coretype='float', **attr)
158 | elif dj_type in ('PositiveSmallIntegerField', 'SmallIntegerField'):
159 | return NumberField(coretype='short', **attr)
160 | elif dj_type in ('IntegerField', 'PositiveIntegerField', 'AutoField'):
161 | return NumberField(coretype='integer', **attr)
162 | elif dj_type in ('BigIntegerField'):
163 | return NumberField(coretype='long', **attr)
164 |
165 | return StringField(**attr)
166 |
--------------------------------------------------------------------------------
/bungiesearch/indices.py:
--------------------------------------------------------------------------------
1 | from six import iteritems, text_type
2 |
3 | from elasticsearch_dsl.analysis import Analyzer
4 |
5 | from .fields import AbstractField, django_field_to_index
6 | from .logger import logger
7 |
8 |
9 | class ModelIndex(object):
10 | '''
11 | Introspects a model to generate an indexable mapping and methods to extract objects.
12 | Supports custom fields, including Python code, and all elasticsearch field types (apart from binary type).
13 |
14 | ModelIndex does efficient querying by only fetching from the database fields which are to be indexed.
15 |
16 | How to create an index?
17 |
18 | 1. Create a class which inherits from ModelIndex.
19 | 2. Define custom indexed fields as class attributes. Values must be instances AbstractField. Important info in 3b.
20 | 3. Define a `Meta` subclass, which must contain at least `model` as a class attribute.
21 | a. Optional class attributes: `fields`, `excludes` and `additional_fields`.
22 | b. If custom indexed field requires model attributes which are not in the difference between `fields` and `excludes`, these must be defined in `additional_fields`.
23 | '''
24 | def __init__(self):
25 | # Introspect the model, adding/removing fields as needed.
26 | # Adds/Excludes should happen only if the fields are not already
27 | # defined in `self.fields`.
28 | try:
29 | _meta = getattr(self, 'Meta')
30 | except AttributeError:
31 | raise AttributeError('ModelIndex {} does not contain a Meta class.'.format(self.__class__.__name__))
32 |
33 | self.model = getattr(_meta, 'model', None)
34 | self.fields = {}
35 | fields = getattr(_meta, 'fields', [])
36 | excludes = getattr(_meta, 'exclude', [])
37 | hotfixes = getattr(_meta, 'hotfixes', {})
38 | additional_fields = getattr(_meta, 'additional_fields', [])
39 | id_field = getattr(_meta, 'id_field', 'id')
40 | self.updated_field = getattr(_meta, 'updated_field', None)
41 | self.optimize_queries = getattr(_meta, 'optimize_queries', False)
42 | self.is_default = getattr(_meta, 'default', True)
43 | self.indexing_query = getattr(_meta, 'indexing_query', None)
44 |
45 | # Add in fields from the model.
46 | self.fields.update(self._get_fields(fields, excludes, hotfixes))
47 | # Elasticsearch uses '_id' to identify items uniquely, so let's duplicate that field.
48 | # We're duplicating it in order for devs to still perform searches on `.id` as expected.
49 | self.fields_to_fetch = list(set(self.fields.keys()).union(additional_fields))
50 |
51 | # Adding or updating the fields which are defined at class level.
52 | for cls_attr, obj in iteritems(self.__class__.__dict__):
53 | if not isinstance(obj, AbstractField):
54 | continue
55 |
56 | if cls_attr in self.fields:
57 | logger.info('Overwriting implicitly defined model field {} ({}) its explicit definition: {}.'.format(cls_attr, text_type(self.fields[cls_attr]), text_type(obj)))
58 | self.fields[cls_attr] = obj
59 |
60 | self.fields['_id'] = self.fields[id_field]
61 |
62 | def matches_indexing_condition(self, item):
63 | '''
64 | Returns True by default to index all documents.
65 | '''
66 | return True
67 |
68 | def get_model(self):
69 | return self.model
70 |
71 | def get_mapping(self, meta_fields=True):
72 | '''
73 | Returns the mapping for the index as a dictionary.
74 |
75 | :param meta_fields: Also include elasticsearch meta fields in the dictionary.
76 | :return: a dictionary which can be used to generate the elasticsearch index mapping for this doctype.
77 | '''
78 | return {'properties': dict((name, field.json()) for name, field in iteritems(self.fields) if meta_fields or name not in AbstractField.meta_fields)}
79 |
80 | def collect_analysis(self):
81 | '''
82 | :return: a dictionary which is used to get the serialized analyzer definition from the analyzer class.
83 | '''
84 | analysis = {}
85 | for field in self.fields.values():
86 | for analyzer_name in ('analyzer', 'index_analyzer', 'search_analyzer'):
87 | if not hasattr(field, analyzer_name):
88 | continue
89 |
90 | analyzer = getattr(field, analyzer_name)
91 |
92 | if not isinstance(analyzer, Analyzer):
93 | continue
94 |
95 | definition = analyzer.get_analysis_definition()
96 | if definition is None:
97 | continue
98 |
99 | for key in definition:
100 | analysis.setdefault(key, {}).update(definition[key])
101 |
102 | return analysis
103 |
104 | def serialize_object(self, obj, obj_pk=None):
105 | '''
106 | Serializes an object for it to be added to the index.
107 |
108 | :param obj: Object to be serialized. Optional if obj_pk is passed.
109 | :param obj_pk: Object primary key. Superseded by `obj` if available.
110 | :return: A dictionary representing the object as defined in the mapping.
111 | '''
112 | if not obj:
113 | try:
114 | # We're using `filter` followed by `values` in order to only fetch the required fields.
115 | obj = self.model.objects.filter(pk=obj_pk).values(*self.fields_to_fetch)[0]
116 | except Exception as e:
117 | raise ValueError('Could not find object of primary key = {} in model {} (model index class {}). (Original exception: {}.)'.format(obj_pk, self.model, self.__class__.__name__, e))
118 |
119 | serialized_object = {}
120 |
121 | for name, field in iteritems(self.fields):
122 | if hasattr(self, "prepare_%s" % name):
123 | value = getattr(self, "prepare_%s" % name)(obj)
124 | else:
125 | value = field.value(obj)
126 |
127 | serialized_object[name] = value
128 |
129 | return serialized_object
130 |
131 | def _get_fields(self, fields, excludes, hotfixes):
132 | '''
133 | Given any explicit fields to include and fields to exclude, add
134 | additional fields based on the associated model. If the field needs a hotfix, apply it.
135 | '''
136 | final_fields = {}
137 | fields = fields or []
138 | excludes = excludes or []
139 |
140 | for f in self.model._meta.fields:
141 | # If the field name is already present, skip
142 | if f.name in self.fields:
143 | continue
144 |
145 | # If field is not present in explicit field listing, skip
146 | if fields and f.name not in fields:
147 | continue
148 |
149 | # If field is in exclude list, skip
150 | if excludes and f.name in excludes:
151 | continue
152 |
153 | # If field is a relation, skip.
154 | if getattr(f, 'rel'):
155 | continue
156 |
157 | attr = {'model_attr': f.name}
158 | if f.has_default():
159 | attr['null_value'] = f.default
160 |
161 | if f.name in hotfixes:
162 | attr.update(hotfixes[f.name])
163 |
164 | final_fields[f.name] = django_field_to_index(f, **attr)
165 |
166 | return final_fields
167 |
168 | def __str__(self):
169 | return '<{0.__class__.__name__}:{0.model.__name__}>'.format(self)
170 |
--------------------------------------------------------------------------------
/bungiesearch/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logger = logging.getLogger('bungiesearch')
4 |
--------------------------------------------------------------------------------
/bungiesearch/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/bungiesearch/management/__init__.py
--------------------------------------------------------------------------------
/bungiesearch/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Commands allow you to manage the index.
3 | '''
--------------------------------------------------------------------------------
/bungiesearch/management/commands/_utils.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | def add_arguments(obj, parser):
4 | parser.add_argument(
5 | '--noinput',
6 | action='store_false',
7 | dest='interactive',
8 | default=True,
9 | help='If provided, no prompts will be issued to the user and the data will be wiped out'
10 | )
11 | parser.add_argument(
12 | '--guilty-as-charged',
13 | action='store_true',
14 | dest='confirmed',
15 | default=False,
16 | help='Flag needed to confirm the clear index.'
17 | )
18 | parser.add_argument(
19 | '--timeout',
20 | action='store',
21 | dest='timeout',
22 | default=None,
23 | type=int,
24 | help='Specify the timeout in seconds for each operation.'
25 | )
26 |
--------------------------------------------------------------------------------
/bungiesearch/management/commands/clear_index.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from django.core.management import call_command
4 | from django.core.management.base import BaseCommand
5 | from django.utils import six
6 |
7 | from ._utils import add_arguments
8 |
9 |
10 | class Command(BaseCommand):
11 | help = 'Clears the search index of its contents.'
12 | add_arguments = add_arguments
13 |
14 | def handle(self, **options):
15 | if options.get('interactive', True):
16 | print('WARNING: This will irreparably remove EVERYTHING from your search index.')
17 | print('Your choices after this are to restore from backups or rebuild via the `rebuild_index` command.')
18 |
19 | yes_or_no = six.moves.input('Are you sure you wish to continue? [y/N] ')
20 | print
21 |
22 | if yes_or_no not in ['y', 'N']:
23 | print('No action taken: please type either "y" or "N".')
24 | sys.exit()
25 |
26 | if yes_or_no == 'N':
27 | print('No action taken.')
28 | sys.exit()
29 |
30 | if not options['confirmed']:
31 | print('No action taken: you must provide the --guilty-as-charged flag.')
32 | sys.exit()
33 |
34 | call_command('search_index', action='delete', **options)
35 | call_command('search_index', action='create', **options)
36 |
--------------------------------------------------------------------------------
/bungiesearch/management/commands/rebuild_index.py:
--------------------------------------------------------------------------------
1 | from django.core.management import call_command
2 | from django.core.management.base import BaseCommand
3 |
4 | from ._utils import add_arguments
5 |
6 |
7 | class Command(BaseCommand):
8 | help = "Rebuilds the search index by clearing the search index and then performing an update."
9 | add_arguments = add_arguments
10 |
11 | def handle(self, **options):
12 | call_command('clear_index', **options)
13 | call_command('search_index', action='update', **options)
14 |
--------------------------------------------------------------------------------
/bungiesearch/management/commands/search_index.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 |
3 | from django.core.management.base import BaseCommand
4 | from six import iteritems
5 |
6 | from ... import Bungiesearch
7 | from ...logger import logger
8 | from ...utils import update_index
9 |
10 |
11 | class Command(BaseCommand):
12 | args = ''
13 | help = 'Manage search index.'
14 |
15 | def add_arguments(self, parser):
16 | parser.add_argument(
17 | '--create',
18 | action='store_const',
19 | dest='action',
20 | const='create',
21 | help='Create the index specified in the settings with the mapping generating from the search indices.'
22 | )
23 | parser.add_argument(
24 | '--update',
25 | action='store_const',
26 | dest='action',
27 | const='update',
28 | help='Update the index specified in the settings with the mapping generating from the search indices.')
29 | parser.add_argument(
30 | '--update-mapping',
31 | action='store_const',
32 | dest='action',
33 | const='update-mapping',
34 | help='Update the mapping of specified models (or all models) on the index specified in the settings.')
35 | parser.add_argument(
36 | '--delete',
37 | action='store_const',
38 | dest='action',
39 | const='delete',
40 | help='Delete the index specified in the settings. Requires the "--guilty-as-charged" flag.')
41 | parser.add_argument(
42 | '--delete-mapping',
43 | action='store_const',
44 | dest='action',
45 | const='delete-mapping',
46 | help='Delete the mapping of specified models (or all models) on the index specified in the settings. Requires the "--guilty-as-charged" flag.')
47 | parser.add_argument(
48 | '--guilty-as-charged',
49 | action='store_true',
50 | dest='confirmed',
51 | default=False,
52 | help='Flag needed to delete an index.')
53 | parser.add_argument(
54 | '--models',
55 | action='store',
56 | dest='models',
57 | default=None,
58 | help='Models to be updated, separated by commas. If none are specified, then all models defined in the index will be updated.')
59 | parser.add_argument(
60 | '--index',
61 | action='store',
62 | dest='index',
63 | default=None,
64 | help='Specify the index for which to apply the action, as defined in BUNGIESEARCH.INDEXES of settings. Defaults to using all indices.')
65 | parser.add_argument(
66 | '--bulk-size',
67 | action='store',
68 | dest='bulk_size',
69 | default=100,
70 | type=int,
71 | help='Specify the number of items to be updated together.')
72 | parser.add_argument(
73 | '--num-docs',
74 | action='store',
75 | dest='num_docs',
76 | default=-1,
77 | type=int,
78 | help='Specify the maximum number of items to be indexed. By default will index the whole model.')
79 | parser.add_argument(
80 | '--start',
81 | action='store',
82 | dest='start_date',
83 | default=None,
84 | type=str,
85 | help='Specify the start date and time of documents to be indexed.')
86 | parser.add_argument(
87 | '--end',
88 | action='store',
89 | dest='end_date',
90 | default=None,
91 | type=str,
92 | help='Specify the end date and time of documents to be indexed.')
93 | parser.add_argument(
94 | '--timeout',
95 | action='store',
96 | dest='timeout',
97 | default=None,
98 | type=int,
99 | help='Specify the timeout in seconds for each operation.')
100 |
101 | def handle(self, *args, **options):
102 | src = Bungiesearch(timeout=options.get('timeout'))
103 | es = src.get_es_instance()
104 |
105 | if not options['action']:
106 | raise ValueError('No action specified. Must be one of "create", "update" or "delete".')
107 |
108 | if options['action'].startswith('delete'):
109 | if not options['confirmed']:
110 | raise ValueError('If you know what a delete operation does (on index or mapping), add the --guilty-as-charged flag.')
111 | if options['action'] == 'delete':
112 | if options['index']:
113 | indices = [options['index']]
114 | else:
115 | indices = src.get_indices()
116 |
117 | for index in indices:
118 | logger.warning('Deleting elastic search index {}.'.format(index))
119 | es.indices.delete(index=index, ignore=404)
120 |
121 | else:
122 | index_to_doctypes = defaultdict(list)
123 | if options['models']:
124 | logger.info('Deleting mapping for models {} on index {}.'.format(options['models'], index))
125 | for model_name in options['models'].split():
126 | for index in src.get_index(model_name):
127 | index_to_doctypes[index].append(model_name)
128 | elif options['index']:
129 | index = options['index']
130 | logger.info('Deleting mapping for all models on index {}.'.format(index))
131 | index_to_doctypes[index] = src.get_models(index)
132 | else:
133 | for index in src.get_indices():
134 | index_to_doctypes[index] = src.get_models(index)
135 | logger.info('Deleting mapping for all models ({}) on all indices ({}).'.format(index_to_doctypes.values(), index_to_doctypes.keys()))
136 |
137 | for index, doctype_list in iteritems(index_to_doctypes):
138 | es.indices.delete_mapping(index, ','.join(doctype_list), params=None)
139 |
140 | elif options['action'] == 'create':
141 | if options['index']:
142 | indices = [options['index']]
143 | else:
144 | indices = src.get_indices()
145 | for index in indices:
146 | mapping = {}
147 | analysis = {'analyzer': {}, 'tokenizer': {}, 'filter': {}}
148 |
149 | for mdl_idx in src.get_model_indices(index):
150 | mapping[mdl_idx.get_model().__name__] = mdl_idx.get_mapping(meta_fields=False)
151 |
152 | mdl_analysis = mdl_idx.collect_analysis()
153 | for key in analysis.keys():
154 | value = mdl_analysis.get(key)
155 | if value is not None:
156 | analysis[key].update(value)
157 |
158 | logger.info('Creating index {} with {} doctypes.'.format(index, len(mapping)))
159 | es.indices.create(index=index, body={'mappings': mapping, 'settings': {'analysis': analysis}})
160 |
161 | es.cluster.health(index=','.join(indices), wait_for_status='green', timeout='30s')
162 |
163 | elif options['action'] == 'update-mapping':
164 | if options['index']:
165 | indices = [options['index']]
166 | else:
167 | indices = src.get_indices()
168 |
169 | if options['models']:
170 | models = options['models'].split(',')
171 | else:
172 | models = []
173 |
174 | for index in indices:
175 | for model_name in src._idx_name_to_mdl_to_mdlidx[index]:
176 | if models and model_name not in models:
177 | continue
178 | logger.info('Updating mapping of model/doctype {} on index {}.'.format(model_name, index))
179 | try:
180 | es.indices.put_mapping(model_name, src._idx_name_to_mdl_to_mdlidx[index][model_name].get_mapping(), index=index)
181 | except Exception as e:
182 | print(e)
183 | if raw_input('Something terrible happened! Type "abort" to stop updating the mappings: ') == 'abort':
184 | raise e
185 | print('Continuing.')
186 |
187 | else:
188 | if options['index']:
189 | indices = options['index']
190 | else:
191 | indices = src.get_indices()
192 | if options['models']:
193 | model_names = options['models'].split(',')
194 | else:
195 | model_names = [model for index in indices for model in src.get_models(index)]
196 |
197 | logger.info('Updating models {} on indices {}.'.format(model_names, indices))
198 |
199 | # Update index.
200 | for model_name in model_names:
201 | if src.get_model_index(model_name).indexing_query is not None:
202 | update_index(src.get_model_index(model_name).indexing_query, model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date'])
203 | else:
204 | update_index(src.get_model_index(model_name).get_model().objects.all(), model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date'])
205 |
--------------------------------------------------------------------------------
/bungiesearch/managers.py:
--------------------------------------------------------------------------------
1 | from django.conf import settings as dj_settings
2 | from django.db.models import Manager
3 |
4 | from .logger import logger
5 |
6 |
7 | class BungiesearchManager(Manager):
8 | model = None
9 |
10 | '''
11 | A Django manager for integrated search into models.
12 | '''
13 | @property
14 | def search(self):
15 | from bungiesearch import Bungiesearch
16 | return Bungiesearch().index(*Bungiesearch.get_index(self.model, via_class=True)).doc_type(self.model.__name__)
17 |
18 | def search_index(self, index):
19 | from bungiesearch import Bungiesearch
20 | if index not in Bungiesearch.get_index(self.model, via_class=True):
21 | logger.warning('Model/doctype {} is not present on index {}: search may return no results.'.format(self.model.__name__, index))
22 | return Bungiesearch().index(index).doc_type(self.model.__name__)
23 |
24 | def custom_search(self, index, doc_type):
25 | '''
26 | Performs a search on a custom elasticsearch index and mapping. Will not attempt to map result objects.
27 | '''
28 | from bungiesearch import Bungiesearch
29 | return Bungiesearch(raw_results=True).index(index).doc_type(doc_type)
30 |
31 | def contribute_to_class(self, cls, name):
32 | '''
33 | Sets up the signal processor. Since self.model is not available
34 | in the constructor, we perform this operation here.
35 | '''
36 | super(BungiesearchManager, self).contribute_to_class(cls, name)
37 |
38 | from . import Bungiesearch
39 | from .signals import get_signal_processor
40 | settings = Bungiesearch.BUNGIE
41 | if 'SIGNALS' in settings:
42 | self.signal_processor = get_signal_processor()
43 | self.signal_processor.setup(self.model)
44 |
45 | def __getattr__(self, alias):
46 | '''
47 | Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__),
48 | this is only called as a last resort in case the attribute is not found.
49 | This function will check whether the given model is allowed to use the proposed alias and will raise an attribute error if not.
50 | '''
51 | # Don't treat "private" attrs as possible aliases. This prevents an infinite recursion bug.
52 | # Similarly, if Bungiesearch is installed but not enabled, raise the expected error
53 | if alias[0] == '_' or not dj_settings.BUNGIESEARCH:
54 | raise AttributeError("'{}' object has no attribute '{}'".format(type(self), alias))
55 |
56 | return self.search.hook_alias(alias, self.model)
57 |
--------------------------------------------------------------------------------
/bungiesearch/signals.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from importlib import import_module
3 | from threading import Lock
4 |
5 | from django.db.models import signals
6 |
7 | from . import Bungiesearch
8 | from .utils import delete_index_item, update_index
9 |
10 |
11 | def get_signal_processor():
12 | signals = Bungiesearch.BUNGIE['SIGNALS']
13 | if 'SIGNAL_CLASS' in signals:
14 | signal_path = signals['SIGNAL_CLASS'].split('.')
15 | signal_module = import_module('.'.join(signal_path[:-1]))
16 | signal_class = getattr(signal_module, signal_path[-1])
17 | else:
18 | signal_class = BungieSignalProcessor
19 | return signal_class()
20 |
21 |
22 | class BungieSignalProcessor(object):
23 |
24 | __index_lock = Lock()
25 | __items_to_be_indexed = defaultdict(list)
26 |
27 | def post_save_connector(self, sender, instance, **kwargs):
28 | try:
29 | Bungiesearch.get_index(sender, via_class=True)
30 | except KeyError:
31 | return # This model is not managed by Bungiesearch.
32 |
33 | try:
34 | buffer_size = Bungiesearch.BUNGIE['SIGNALS']['BUFFER_SIZE']
35 | except KeyError:
36 | buffer_size = 100
37 |
38 | items = None
39 | with self.__index_lock:
40 | self.__items_to_be_indexed[sender].append(instance)
41 | if len(self.__items_to_be_indexed[sender]) >= buffer_size:
42 | items = self.__items_to_be_indexed[sender]
43 | # Let's now empty this buffer.
44 | self.__items_to_be_indexed[sender] = []
45 |
46 | if items:
47 | update_index(items, sender.__name__, bulk_size=buffer_size)
48 |
49 | def pre_delete_connector(self, sender, instance, **kwargs):
50 | try:
51 | Bungiesearch.get_index(sender, via_class=True)
52 | except KeyError:
53 | return # This model is not managed by Bungiesearch.
54 |
55 | delete_index_item(instance, sender.__name__)
56 |
57 | def setup(self, model):
58 | signals.post_save.connect(self.post_save_connector, sender=model)
59 | signals.pre_delete.connect(self.pre_delete_connector, sender=model)
60 |
61 | def teardown(self, model):
62 | signals.pre_delete.disconnect(self.pre_delete_connector, sender=model)
63 | signals.post_save.disconnect(self.post_save_connector, sender=model)
64 |
--------------------------------------------------------------------------------
/bungiesearch/utils.py:
--------------------------------------------------------------------------------
1 | from dateutil.parser import parse as parsedt
2 | from django.utils import timezone
3 |
4 | from elasticsearch.exceptions import NotFoundError
5 |
6 | from . import Bungiesearch
7 | from .logger import logger
8 |
9 | try:
10 | from elasticsearch.helpers import bulk_index
11 | except ImportError:
12 | from elasticsearch.helpers import bulk as bulk_index
13 |
14 |
15 | def update_index(model_items, model_name, action='index', bulk_size=100, num_docs=-1, start_date=None, end_date=None, refresh=True):
16 | '''
17 | Updates the index for the provided model_items.
18 | :param model_items: a list of model_items (django Model instances, or proxy instances) which are to be indexed/updated or deleted.
19 | If action is 'index', the model_items must be serializable objects. If action is 'delete', the model_items must be primary keys
20 | corresponding to obects in the index.
21 | :param model_name: doctype, which must also be the model name.
22 | :param action: the action that you'd like to perform on this group of data. Must be in ('index', 'delete') and defaults to 'index.'
23 | :param bulk_size: bulk size for indexing. Defaults to 100.
24 | :param num_docs: maximum number of model_items from the provided list to be indexed.
25 | :param start_date: start date for indexing. Must be as YYYY-MM-DD.
26 | :param end_date: end date for indexing. Must be as YYYY-MM-DD.
27 | :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh
28 | immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True.
29 | :note: If model_items contain multiple models, then num_docs is applied to *each* model. For example, if bulk_size is set to 5,
30 | and item contains models Article and Article2, then 5 model_items of Article *and* 5 model_items of Article2 will be indexed.
31 | '''
32 | src = Bungiesearch()
33 |
34 | if action == 'delete' and not hasattr(model_items, '__iter__'):
35 | raise ValueError("If action is 'delete', model_items must be an iterable of primary keys.")
36 |
37 | logger.info('Getting index for model {}.'.format(model_name))
38 | for index_name in src.get_index(model_name):
39 | index_instance = src.get_model_index(model_name)
40 | model = index_instance.get_model()
41 |
42 | if num_docs == -1:
43 | if isinstance(model_items, (list, tuple)):
44 | num_docs = len(model_items)
45 | else:
46 | model_items = filter_model_items(index_instance, model_items, model_name, start_date, end_date)
47 | num_docs = model_items.count()
48 |
49 | if not model_items.ordered:
50 | model_items = model_items.order_by('pk')
51 | else:
52 | logger.warning('Limiting the number of model_items to {} to {}.'.format(action, num_docs))
53 |
54 | logger.info('{} {} documents on index {}'.format(action, num_docs, index_name))
55 | prev_step = 0
56 | max_docs = num_docs + bulk_size if num_docs > bulk_size else bulk_size + 1
57 | for next_step in range(bulk_size, max_docs, bulk_size):
58 | logger.info('{}: documents {} to {} of {} total on index {}.'.format(action.capitalize(), prev_step, next_step, num_docs, index_name))
59 | data = create_indexed_document(index_instance, model_items[prev_step:next_step], action)
60 | bulk_index(src.get_es_instance(), data, index=index_name, doc_type=model.__name__, raise_on_error=True)
61 | prev_step = next_step
62 |
63 | if refresh:
64 | src.get_es_instance().indices.refresh(index=index_name)
65 |
66 |
67 | def delete_index_item(item, model_name, refresh=True):
68 | '''
69 | Deletes an item from the index.
70 | :param item: must be a serializable object.
71 | :param model_name: doctype, which must also be the model name.
72 | :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh
73 | immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True.
74 | '''
75 | src = Bungiesearch()
76 |
77 | logger.info('Getting index for model {}.'.format(model_name))
78 | for index_name in src.get_index(model_name):
79 | index_instance = src.get_model_index(model_name)
80 | item_es_id = index_instance.fields['_id'].value(item)
81 | try:
82 | src.get_es_instance().delete(index_name, model_name, item_es_id)
83 | except NotFoundError as e:
84 | logger.warning('NotFoundError: could not delete {}.{} from index {}: {}.'.format(model_name, item_es_id, index_name, str(e)))
85 |
86 | if refresh:
87 | src.get_es_instance().indices.refresh(index=index_name)
88 |
89 |
90 | def create_indexed_document(index_instance, model_items, action):
91 | '''
92 | Creates the document that will be passed into the bulk index function.
93 | Either a list of serialized objects to index, or a a dictionary specifying the primary keys of items to be delete.
94 | '''
95 | data = []
96 | if action == 'delete':
97 | for pk in model_items:
98 | data.append({'_id': pk, '_op_type': action})
99 | else:
100 | for doc in model_items:
101 | if index_instance.matches_indexing_condition(doc):
102 | data.append(index_instance.serialize_object(doc))
103 | return data
104 |
105 |
106 | def filter_model_items(index_instance, model_items, model_name, start_date, end_date):
107 | ''' Filters the model items queryset based on start and end date.'''
108 | if index_instance.updated_field is None:
109 | logger.warning("No updated date field found for {} - not restricting with start and end date".format(model_name))
110 | else:
111 | if start_date:
112 | model_items = model_items.filter(**{'{}__gte'.format(index_instance.updated_field): __str_to_tzdate__(start_date)})
113 | if end_date:
114 | model_items = model_items.filter(**{'{}__lte'.format(index_instance.updated_field): __str_to_tzdate__(end_date)})
115 |
116 | return model_items
117 |
118 |
119 | def __str_to_tzdate__(date_str):
120 | return timezone.make_aware(parsedt(date_str), timezone.get_current_timezone())
121 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl>=2.0.0,<3.0.0
2 | elasticsearch>=2.0.0,<3.0.0
3 | python-dateutil
4 | six
5 |
6 | bungiesearch
7 | coveralls
8 | pytz
9 |
--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # pass in --cluster as an argument to start a cluster instead of a single node
3 | set -e
4 | trap 'jobs -p | xargs kill -9' EXIT
5 |
6 | CLUSTER_URL=http://127.0.0.1:9200
7 | ES_PATH=elasticsearch
8 |
9 | if [ ${TRAVIS} ]; then
10 | ES_PATH=./elasticsearch-2.3.0/bin/elasticsearch
11 | fi
12 |
13 | function has_command() {
14 | type $1 &> /dev/null
15 | }
16 |
17 | function is_responding() {
18 | curl --output /dev/null --fail --silent $1
19 | }
20 |
21 | function wait_for_cluster() {
22 | echo 'Waiting on elasticsearch to be ready on port 9200'
23 | until is_responding "$CLUSTER_URL/_cluster/health?wait_for_nodes=$1&wait_for_status=green"; do
24 | printf '.'
25 | sleep 1
26 | done
27 | echo
28 | }
29 |
30 | if ! is_responding $CLUSTER_URL; then
31 | if ! has_command elasticsearch; then
32 | echo 'No elasticsearch command found and no server running'
33 | echo 'Elasticsearch cluster must be running on port 9200'
34 | exit 1
35 | else
36 | if [ "$1" != "--cluster" ]; then
37 | echo 'Starting single elasticsearch node'
38 | $ES_PATH -D es.index.number_of_replicas=0 &> /dev/null &
39 | wait_for_cluster 1
40 | else
41 | echo 'Starting elasticsearch cluster with 2 nodes'
42 | $ES_PATH \
43 | -D es.cluster.name="mycluster" \
44 | -D es.node.name="mycluster-node2" \
45 | -D es.node.master=true \
46 | -D es.node.data=false \
47 | -D es.index.number_of_replicas=0 \
48 | -D es.network.host=127.0.0.1 \
49 | -D es.foreground=yes \
50 | -D es.discovery.zen.ping.multicast.enabled=false \
51 | -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null &
52 |
53 | $ES_PATH \
54 | -D es.cluster.name="mycluster" \
55 | -D es.node.name="mycluster-node2" \
56 | -D es.node.master=false \
57 | -D es.node.data=true \
58 | -D es.index.number_of_replicas=0 \
59 | -D es.network.host=127.0.0.1 \
60 | -D es.foreground=yes \
61 | -D es.discovery.zen.ping.multicast.enabled=false \
62 | -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null &
63 |
64 | wait_for_cluster 2
65 | fi
66 | fi
67 | fi
68 |
69 | python -B tests/manage.py test
70 |
71 | # only collect coverage in travis ci
72 | if [ ${COVERAGE} ]; then
73 | echo 'Starting to collect coverage...'
74 | coverage run --source=tests tests/manage.py test
75 | fi
76 |
77 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 | from os.path import dirname, join
4 |
5 | from setuptools import find_packages, setup
6 |
7 | VERSION = (1, 3, 0)
8 | __version__ = VERSION
9 | __versionstr__ = '.'.join(map(str, VERSION))
10 |
11 | long_description = 'Should have been loaded from README.md.'
12 | with open(join(dirname(__file__), 'README.rst')) as f:
13 | long_description = f.read().strip()
14 |
15 |
16 | install_requires = [
17 | 'django>=1.8',
18 | 'elasticsearch-dsl>=2.0.0,<3.0.0',
19 | 'elasticsearch>=2.0.0,<3.0.0',
20 | 'python-dateutil',
21 | 'six',
22 | ]
23 |
24 | tests_require = []
25 |
26 | # use external unittest for 2.6
27 | if sys.version_info[:2] == (2, 6):
28 | tests_require.append('unittest2')
29 |
30 | setup(
31 | name="bungiesearch",
32 | description="A Django elasticsearch wrapper and helper using elasticsearch-dsl-py high level library.",
33 | license="BSD-3",
34 | url="https://github.com/ChristopherRabotin/bungiesearch",
35 | long_description=long_description,
36 | version=__versionstr__,
37 | author="Christopher Rabotin",
38 | author_email="christopher.rabotin@gmail.com",
39 | packages=find_packages(
40 | where='.',
41 | exclude=('bungiesearch/tests',)
42 | ),
43 | classifiers=[
44 | "Development Status :: 5 - Production/Stable",
45 | "Intended Audience :: Developers",
46 | "License :: OSI Approved :: BSD License",
47 | "Operating System :: OS Independent",
48 | "Programming Language :: Python :: 2",
49 | "Programming Language :: Python :: 3",
50 | "Programming Language :: Python :: 3.4",
51 | "Programming Language :: Python :: 3.5",
52 | "Framework :: Django"
53 | ],
54 | keywords="elasticsearch haystack django bungiesearch",
55 | install_requires=install_requires,
56 | dependency_links=['https://github.com/elasticsearch/elasticsearch-dsl-py#egg=elasticsearch-dsl-py'],
57 | )
58 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -e
3 |
4 | if [ $(whoami) == "root" ] && [ "$1" != "--force" ]; then
5 | echo "It's not recommended to run setup with root"
6 | echo 'run with --force to ignore'
7 | exit 1
8 | fi
9 |
10 | if [ -z "$VIRTUAL_ENV" ] && [ "$1" != "--force" ]; then
11 | echo "$0 should be run inside a python virtualenv"
12 | echo 'run with --force to ignore'
13 | exit 1
14 | fi
15 |
16 | echo 'Installing Python dependencies'
17 | pip install pip setuptools --upgrade
18 | pip install -r requirements.txt
19 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/__init__.py
--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/core/__init__.py
--------------------------------------------------------------------------------
/tests/core/analysis.py:
--------------------------------------------------------------------------------
1 | from elasticsearch_dsl.analysis import analyzer, token_filter
2 |
3 | edge_ngram_analyzer = analyzer(
4 | 'edge_ngram_analyzer',
5 | type='custom',
6 | tokenizer='standard',
7 | filter=[
8 | 'lowercase',
9 | token_filter(
10 | 'edge_ngram_filter',
11 | type='edgeNGram',
12 | min_gram=2,
13 | max_gram=20
14 | )
15 | ]
16 | )
17 |
--------------------------------------------------------------------------------
/tests/core/bungie_signal.py:
--------------------------------------------------------------------------------
1 | '''
2 | This test signal acts as a proxy to BungieSignalProcessor. It allows us
3 | to test the functionality of the default signal processor while using a
4 | custom processor instead, hence testing that we can plug in and use a custom
5 | signal processor.
6 | '''
7 | from django.db.models import signals
8 |
9 | from bungiesearch.signals import BungieSignalProcessor
10 |
11 |
12 | class BungieTestSignalProcessor(BungieSignalProcessor):
13 |
14 | def handle_save(self, sender, instance, **kwargs):
15 | self.post_save_connector(sender, instance, **kwargs)
16 |
17 | def handle_delete(self, sender, instance, **kwargs):
18 | self.pre_delete_connector(sender, instance, **kwargs)
19 |
20 | def setup(self, model):
21 | signals.post_save.connect(self.handle_save, sender=model)
22 | signals.pre_delete.connect(self.handle_delete, sender=model)
23 | self.setup_ran = True
24 |
25 | def teardown(self, model):
26 | signals.pre_delete.disconnect(self.handle_delete, sender=model)
27 | signals.post_save.disconnect(self.handle_save, sender=model)
28 | self.teardown_ran = True
29 |
--------------------------------------------------------------------------------
/tests/core/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 |
3 | from bungiesearch.managers import BungiesearchManager
4 |
5 |
6 | class Article(models.Model):
7 | title = models.TextField(db_index=True)
8 | authors = models.TextField(blank=True)
9 | description = models.TextField(blank=True)
10 | text_field = models.TextField(null=True)
11 | link = models.URLField(max_length=510, unique=True, db_index=True)
12 | published = models.DateTimeField(null=True)
13 | created = models.DateTimeField(auto_now_add=True)
14 | updated = models.DateTimeField(null=True)
15 | tweet_count = models.IntegerField()
16 | raw = models.BinaryField(null=True)
17 | source_hash = models.BigIntegerField(null=True)
18 | missing_data = models.CharField(blank=True, max_length=255)
19 | positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
20 | negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
21 | popularity_index = models.IntegerField(default=0)
22 |
23 | objects = BungiesearchManager()
24 |
25 | class Meta:
26 | app_label = 'core'
27 |
28 |
29 | class User(models.Model):
30 | name = models.TextField(db_index=True)
31 | user_id = models.TextField(blank=True, primary_key=True)
32 | about = models.TextField(blank=True)
33 | created = models.DateTimeField(auto_now_add=True)
34 | updated = models.DateTimeField(null=True)
35 |
36 | objects = BungiesearchManager()
37 |
38 | class Meta:
39 | app_label = 'core'
40 |
41 |
42 | class NoUpdatedField(models.Model):
43 | field_title = models.TextField(db_index=True)
44 | field_description = models.TextField(blank=True)
45 |
46 | objects = BungiesearchManager()
47 |
48 | class Meta:
49 | app_label = 'core'
50 |
51 |
52 | class ManangedButEmpty(models.Model):
53 | field_title = models.TextField(db_index=True)
54 | field_description = models.TextField(blank=True)
55 |
56 | objects = BungiesearchManager()
57 |
58 | class Meta:
59 | app_label = 'core'
60 |
61 |
62 | class Unmanaged(models.Model):
63 | field_title = models.TextField(db_index=True)
64 | field_description = models.TextField(blank=True)
65 |
66 | class Meta:
67 | app_label = 'core'
68 |
--------------------------------------------------------------------------------
/tests/core/search_aliases.py:
--------------------------------------------------------------------------------
1 | from bungiesearch.aliases import SearchAlias
2 | from core.models import Article, NoUpdatedField
3 |
4 |
5 | class SearchTitle(SearchAlias):
6 | def alias_for(self, title):
7 | return self.search_instance.query('match', title=title)
8 |
9 | class Meta:
10 | models = (Article,)
11 | alias_name = 'title_search'
12 |
13 | class Title(SearchAlias):
14 | def alias_for(self, title):
15 | return self.search_instance.query('match', title=title)
16 |
17 | class InvalidAlias(SearchAlias):
18 | class Meta:
19 | models = (Article,)
20 |
21 | class TitleFilter(SearchAlias):
22 | def alias_for(self, title):
23 | return self.search_instance.filter('term', title=title)
24 |
25 | class NoUpdatedMdlOnly(SearchAlias):
26 | def alias_for(self, title):
27 | return self.search_instance.filter('term', title=title)
28 |
29 | class Meta:
30 | models = (NoUpdatedField,)
31 |
32 | class ReturningSelfAlias(SearchAlias):
33 | def alias_for(self):
34 | return self
35 |
36 | class Meta:
37 | alias_name = 'get_alias_for_test'
38 |
39 | class BisIndex(SearchAlias):
40 | def alias_for(self):
41 | self.search_instance._index = 'bungiesearch_demo_bis'
42 | return self.search_instance
43 |
44 | class Meta:
45 | models = (Article,)
46 | alias_name = 'bisindex'
47 |
--------------------------------------------------------------------------------
/tests/core/search_indices.py:
--------------------------------------------------------------------------------
1 | from bungiesearch.fields import DateField, NumberField, StringField
2 | from bungiesearch.indices import ModelIndex
3 | from core.models import Article, NoUpdatedField, User
4 |
5 | from .analysis import edge_ngram_analyzer
6 |
7 |
8 | class ArticleIndex(ModelIndex):
9 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
10 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
11 | text = StringField(template='article.txt', analyzer=edge_ngram_analyzer)
12 |
13 | class Meta:
14 | model = Article
15 | updated_field = 'updated'
16 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
17 | hotfixes = {'updated': {'null_value': '2013-07-01'},
18 | 'title': {'boost': 1.75},
19 | 'description': {'boost': 1.35},
20 | 'full_text': {'boost': 1.125}}
21 | default = True
22 |
23 |
24 | class UserIndex(ModelIndex):
25 | effective_date = DateField(eval_as='obj.created if obj.created and obj.updated > obj.created else obj.updated')
26 | about = StringField(model_attr='about', analyzer=edge_ngram_analyzer)
27 | int_about = NumberField(coretype='integer')
28 |
29 | def prepare_int_about(self, obj):
30 | try:
31 | int_about = int(obj.about)
32 | except ValueError:
33 | int_about = 1
34 |
35 | return int_about
36 |
37 | class Meta:
38 | model = User
39 | id_field = 'user_id'
40 | updated_field = 'updated'
41 | hotfixes = {'updated': {'null_value': '2013-07-01'},
42 | 'about': {'boost': 1.35}}
43 | default = True
44 |
45 |
46 | class NoUpdatedFieldIndex(ModelIndex):
47 | class Meta:
48 | model = NoUpdatedField
49 | exclude = ('field_description',)
50 | optimize_queries = True
51 | indexing_query = NoUpdatedField.objects.defer(*exclude).select_related().all()
52 |
--------------------------------------------------------------------------------
/tests/core/search_indices_bis.py:
--------------------------------------------------------------------------------
1 | from bungiesearch.fields import DateField, StringField
2 | from bungiesearch.indices import ModelIndex
3 | from core.models import Article, ManangedButEmpty, User
4 |
5 |
6 | class ArticleIndex(ModelIndex):
7 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
8 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
9 | more_fields = StringField(eval_as='"some value"')
10 |
11 | class Meta:
12 | model = Article
13 | updated_field = 'updated'
14 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
15 | hotfixes = {'updated': {'null_value': '2013-07-01'},
16 | 'title': {'boost': 1.75},
17 | 'description': {'boost': 1.35},
18 | 'full_text': {'boost': 1.125}}
19 | default = False
20 |
21 |
22 | class UserIndex(ModelIndex):
23 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
24 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
25 | more_fields = StringField(eval_as='"some value"')
26 |
27 | class Meta:
28 | model = User
29 | id_field = 'user_id'
30 | updated_field = 'updated'
31 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
32 | hotfixes = {'updated': {'null_value': '2013-07-01'},
33 | 'title': {'boost': 1.75},
34 | 'about': {'boost': 1.35},
35 | 'full_text': {'boost': 1.125}}
36 | default = False
37 |
38 |
39 | class EmptyIndex(ModelIndex):
40 | def matches_indexing_condition(self, item):
41 | return False
42 |
43 | class Meta:
44 | model = ManangedButEmpty
45 | exclude = ('field_description',)
46 | optimize_queries = True
47 |
--------------------------------------------------------------------------------
/tests/core/templates/article.txt:
--------------------------------------------------------------------------------
1 | {{ object.title }}
2 | {{ object.authors }}
3 | {{ object.description }}
4 | {{ object.text_field }}
--------------------------------------------------------------------------------
/tests/core/test_bungiesearch.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | from django.core.management import call_command
4 | from django.test import TestCase, override_settings
5 | from six import iteritems
6 |
7 | import pytz
8 | from bungiesearch import Bungiesearch
9 | from bungiesearch.utils import update_index
10 | from core.bungie_signal import BungieTestSignalProcessor
11 | from core.models import (Article, ManangedButEmpty, NoUpdatedField, Unmanaged,
12 | User)
13 | from core.search_indices import ArticleIndex, UserIndex
14 |
15 |
16 | class CoreTestCase(TestCase):
17 | @classmethod
18 | def setUpClass(cls):
19 | # Let's start by creating the index and mapping.
20 | # If we create an object before the index, the index
21 | # will be created automatically, and we want to test the command.
22 | call_command('search_index', action='create')
23 |
24 | art_1 = {'title': 'Title one',
25 | 'description': 'Description of article 1.',
26 | 'text_field': '',
27 | 'link': 'http://example.com/article_1',
28 | 'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)),
29 | 'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)),
30 | 'tweet_count': 20,
31 | 'source_hash': 159159159159,
32 | 'missing_data': '',
33 | 'positive_feedback': 50,
34 | 'negative_feedback': 5,
35 | }
36 |
37 | user_1 = {'user_id': 'bungie1',
38 | 'about': 'Description of user 1',
39 | 'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)),
40 | 'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)),
41 | }
42 |
43 | Article.objects.create(**art_1)
44 | User.objects.create(**user_1)
45 |
46 | art_2 = dict((k, v) for k, v in iteritems(art_1))
47 | art_2['link'] += '/page2'
48 | art_2['title'] = 'Title two'
49 | art_2['description'] = 'This is a second article.'
50 | art_2['text_field'] = None
51 | art_2['published'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15))
52 |
53 | user_2 = dict((k, v) for k, v in iteritems(user_1))
54 | user_2['user_id'] = 'bungie2'
55 | user_2['about'] = 'This is the second user'
56 | user_2['created'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15))
57 |
58 | Article.objects.create(**art_2)
59 | User.objects.create(**user_2)
60 | NoUpdatedField.objects.create(field_title='My title', field_description='This is a short description.')
61 |
62 | call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged')
63 |
64 | def test_count_after_clear(self):
65 | # can flake because elasticsearch create API is asynchronous
66 | self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2)
67 | call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged')
68 | self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2)
69 |
70 | @classmethod
71 | def tearDownClass(cls):
72 | call_command('search_index', action='delete', confirmed='guilty-as-charged')
73 |
74 | def test_model_index_generation(self):
75 | '''
76 | Check that the mapping is the expected one.
77 | '''
78 | expected_article = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'},
79 | 'description': {'type': 'string', 'boost': 1.35, 'analyzer': 'snowball'},
80 | 'text': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'},
81 | 'text_field': {'type': 'string', 'analyzer': 'snowball'},
82 | 'created': {'type': 'date'},
83 | 'title': {'type': 'string', 'boost': 1.75, 'analyzer': 'snowball'},
84 | 'authors': {'type': 'string', 'analyzer': 'snowball'},
85 | 'meta_data': {'type': 'string', 'analyzer': 'snowball'},
86 | 'link': {'type': 'string', 'analyzer': 'snowball'},
87 | 'effective_date': {'type': 'date'},
88 | 'tweet_count': {'type': 'integer'},
89 | 'id': {'type': 'integer'},
90 | '_id': {'type': 'integer'}, # This is the elastic search index.
91 | 'published': {'type': 'date'}}
92 | }
93 | expected_user = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'},
94 | 'about': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'},
95 | 'int_about': {'type': 'integer'},
96 | 'user_id': {'analyzer': 'snowball', 'type': 'string'},
97 | 'effective_date': {'type': 'date'},
98 | 'created': {'type': 'date'},
99 | 'name': {'analyzer': 'snowball', 'type': 'string'},
100 | '_id': {'analyzer': 'snowball', 'type': 'string'}}
101 | }
102 |
103 | self.assertEqual(ArticleIndex().get_mapping(), expected_article)
104 | self.assertEqual(UserIndex().get_mapping(), expected_user)
105 |
106 | def test_fetch_item(self):
107 | '''
108 | Test searching and mapping.
109 | '''
110 | self.assertEqual(Article.objects.search.query('match', _all='Description')[0], Article.objects.get(title='Title one'), 'Searching for "Description" did not return just the first Article.')
111 | self.assertEqual(Article.objects.search.query('match', _all='second article')[0], Article.objects.get(title='Title two'), 'Searching for "second article" did not return the second Article.')
112 |
113 | self.assertEqual(User.objects.search.query('match', _all='Description')[0], User.objects.get(user_id='bungie1'), 'Searching for "About" did not return the User.')
114 | self.assertEqual(User.objects.search.query('match', _all='second user')[0], User.objects.get(user_id='bungie2'), 'Searching for "second user" did not return the User.')
115 |
116 | def test_raw_fetch(self):
117 | '''
118 | Test searching and mapping.
119 | '''
120 | item = Article.objects.search.query('match', _all='Description')[:1:True]
121 | self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.')
122 |
123 | item = User.objects.search.query('match', _all='Description')[:1:True]
124 | self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.')
125 |
126 | def test_iteration(self):
127 | '''
128 | Tests iteration on Bungiesearch items.
129 | '''
130 | lazy_search_article = Article.objects.search.query('match', title='title')
131 | db_items = list(Article.objects.all())
132 | self.assertTrue(all([result in db_items for result in lazy_search_article]), 'Searching for title "title" did not return all articles.')
133 | self.assertTrue(all([result in db_items for result in lazy_search_article[:]]), 'Searching for title "title" did not return all articles when using empty slice.')
134 | self.assertEqual(len(lazy_search_article[:1]), 1, 'Get item with start=None and stop=1 did not return one item.')
135 | self.assertEqual(len(lazy_search_article[:2]), 2, 'Get item with start=None and stop=2 did not return two item.')
136 |
137 | lazy_search_user = User.objects.search.query('match', about='user')
138 | db_items = list(User.objects.all())
139 | self.assertTrue(all([result in db_items for result in lazy_search_user]), 'Searching for description "user" did not return all articles.')
140 | self.assertTrue(all([result in db_items for result in lazy_search_user[:]]), 'Searching for description "user" did not return all articles when using empty slice.')
141 | self.assertEqual(len(lazy_search_user[:1]), 1, 'Get item with start=None and stop=1 did not return one item.')
142 | self.assertEqual(len(lazy_search_user[:2]), 2, 'Get item with start=None and stop=2 did not return two item.')
143 |
144 | def test_no_results(self):
145 | '''
146 | Test empty results.
147 | '''
148 | self.assertEqual(list(Article.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
149 | self.assertEqual(Article.objects.search.query('match', _all='nothing')[:10], [], 'Searching for "nothing" did not return an empty list on get item call.')
150 |
151 | self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
152 | self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
153 |
154 | def test_custom_search(self):
155 | '''
156 | Test searching on custom index and doc_type.
157 | '''
158 | search = Article.objects.custom_search(index='bungiesearch_demo', doc_type='Article')
159 | es_art1 = search.query('match', _all='Description')[0]
160 | db_art1 = Article.objects.get(title='Title one')
161 | es_art2 = search.query('match', _all='second article')[0]
162 | db_art2 = Article.objects.get(title='Title two')
163 | self.assertTrue(all([es_art1.id == db_art1.id, es_art1.title == db_art1.title, es_art1.description == db_art1.description]), 'Searching for "Description" did not return the first Article.')
164 | self.assertTrue(all([es_art2.id == db_art2.id, es_art2.title == db_art2.title, es_art2.description == db_art2.description]), 'Searching for "second article" did not return the second Article.')
165 |
166 | search = User.objects.custom_search(index='bungiesearch_demo', doc_type='User')
167 | es_user1 = search.query('match', _all='Description')[0]
168 | db_user1 = User.objects.get(user_id='bungie1')
169 | self.assertRaises(AttributeError, getattr, es_user1, 'id')
170 | self.assertTrue(all([es_user1.user_id == db_user1.user_id, es_user1.about == db_user1.about]), 'Searching for "About" did not return the first User.')
171 |
172 | def test_get_model(self):
173 | '''
174 | Test model mapping.
175 | '''
176 | self.assertEqual(ArticleIndex().get_model(), Article, 'Model was not Article.')
177 | self.assertEqual(UserIndex().get_model(), User, 'Model was not User')
178 |
179 | def test_cloning(self):
180 | '''
181 | Tests that Bungiesearch remains lazy with specific function which should return clones.
182 | '''
183 | inst = Article.objects.search.query('match', _all='Description')
184 | self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.')
185 |
186 | inst = User.objects.search.query('match', _all='Description')
187 | self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.')
188 |
189 | def test_search_alias_exceptions(self):
190 | '''
191 | Tests that invalid aliases raise exceptions.
192 | '''
193 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias')
194 | self.assertRaises(NotImplementedError, Article.objects.bsearch_invalidalias)
195 | self.assertRaises(ValueError, getattr, Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter'), 'bsearch_noupdatedmdlonly')
196 |
197 | @override_settings(BUNGIESEARCH={})
198 | def test_search_alias_not_setup(self):
199 | '''
200 | Tests that Bungiesearch is not instantiated when not set up
201 | This is its own test due to the override_settings decorator
202 | '''
203 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias')
204 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_title_search')
205 |
206 | def test_search_aliases(self):
207 | '''
208 | Tests search alias errors and functionality.
209 | '''
210 | title_alias = Article.objects.bsearch_title_search('title')
211 | db_items = list(Article.objects.all())
212 | self.assertEqual(title_alias.to_dict(), {'query': {'match': {'title': 'title'}}}, 'Title alias search did not return the expected JSON query.')
213 | self.assertTrue(all([result in db_items for result in title_alias]), 'Alias searching for title "title" did not return all articles.')
214 | self.assertTrue(all([result in db_items for result in title_alias[:]]), 'Alias searching for title "title" did not return all articles when using empty slice.')
215 | self.assertEqual(len(title_alias[:1]), 1, 'Get item on an alias search with start=None and stop=1 did not return one item.')
216 | self.assertEqual(len(title_alias[:2]), 2, 'Get item on an alias search with start=None and stop=2 did not return two item.')
217 | self.assertEqual(title_alias.to_dict(), Article.objects.bsearch_title('title').to_dict(), 'Alias applicable to all models does not return the same JSON request body as the model specific one.')
218 | self.assertEqual(NoUpdatedField.objects.search.filter('term', title='My title').to_dict(), NoUpdatedField.objects.bsearch_noupdatedmdlonly('My title').to_dict(), 'Alias applicable only to NoUpdatedField does not generate the correct filter.')
219 |
220 | def test_bungie_instance_search_aliases(self):
221 | alias_dictd = Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter').to_dict()
222 | expected = {'query': {'bool': {'filter': [{'term': {'title': 'title filter'}}], 'must': [{'match': {'title': 'title query'}}]}}}
223 | self.assertEqual(alias_dictd, expected, 'Alias on Bungiesearch instance did not return the expected dictionary.')
224 |
225 | def test_search_alias_model(self):
226 | self.assertEqual(Article.objects.bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.')
227 | self.assertEqual(Article.objects.search.bsearch_title('title query').bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.')
228 | self.assertRaises(ValueError, Bungiesearch().bsearch_get_alias_for_test().get_model)
229 |
230 | def test_post_save(self):
231 | art = {'title': 'Title three',
232 | 'description': 'Postsave',
233 | 'link': 'http://example.com/sparrho',
234 | 'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)),
235 | 'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)),
236 | 'tweet_count': 20,
237 | 'source_hash': 159159159159,
238 | 'missing_data': '',
239 | 'positive_feedback': 50,
240 | 'negative_feedback': 5}
241 | obj = Article.objects.create(**art)
242 | find_three = Article.objects.search.query('match', title='three')
243 | self.assertEqual(len(find_three), 2, 'Searching for "three" in title did not return exactly two items (got {}).'.format(find_three))
244 | # Let's check that both returned items are from different indices.
245 | self.assertNotEqual(find_three[0:1:True].meta.index, find_three[1:2:True].meta.index, 'Searching for "three" did not return items from different indices.')
246 | # Let's now delete this object to test the post delete signal.
247 | obj.delete()
248 |
249 | def test_bulk_delete(self):
250 | '''
251 | This tests that using the update_index function with 'delete' as the action performs a bulk delete operation on the data.
252 | '''
253 | bulk_art1 = {'title': 'Title four',
254 | 'description': 'Bulk delete first',
255 | 'link': 'http://example.com/bd1',
256 | 'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)),
257 | 'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)),
258 | 'tweet_count': 20,
259 | 'source_hash': 159159159159,
260 | 'missing_data': '',
261 | 'positive_feedback': 50,
262 | 'negative_feedback': 5}
263 | bulk_art2 = {'title': 'Title five',
264 | 'description': 'Bulk delete second',
265 | 'link': 'http://example.com/bd2',
266 | 'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)),
267 | 'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)),
268 | 'tweet_count': 20,
269 | 'source_hash': 159159159159,
270 | 'missing_data': '',
271 | 'positive_feedback': 50,
272 | 'negative_feedback': 5}
273 |
274 | bulk_obj1 = Article.objects.create(**bulk_art1)
275 | bulk_obj2 = Article.objects.create(**bulk_art2)
276 |
277 | find_five = Article.objects.search.query('match', title='five')
278 | self.assertEqual(len(find_five), 2, 'Searching for "five" in title did not return exactly two results (got {})'.format(find_five))
279 |
280 | model_items = [bulk_obj1.pk, bulk_obj2.pk]
281 | model_name = Article.__name__
282 | update_index(model_items, model_name, action='delete', bulk_size=2, num_docs=-1, start_date=None, end_date=None, refresh=True)
283 |
284 | find_four = Article.objects.search.query('match', title='four')
285 | self.assertEqual(len(find_four), 0, 'Searching for "four" in title did not return exactly zero results (got {})'.format(find_four))
286 | find_five = Article.objects.search.query('match', title='five')
287 | self.assertEqual(len(find_five), 0, 'Searching for "five" in title did not return exactly zero results (got {})'.format(find_five))
288 |
289 | def test_manager_interference(self):
290 | '''
291 | This tests that saving an object which is not managed by Bungiesearch won't try to update the index for that model.
292 | '''
293 | Unmanaged.objects.create(field_title='test', field_description='blah')
294 |
295 | def test_time_indexing(self):
296 | update_index(Article.objects.all(), 'Article', start_date=datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M'))
297 | update_index(NoUpdatedField.objects.all(), 'NoUpdatedField', end_date=datetime.strftime(datetime.now(), '%Y-%m-%d'))
298 |
299 | def test_optimal_queries(self):
300 | db_item = NoUpdatedField.objects.get(pk=1)
301 | src_item = NoUpdatedField.objects.search.query('match', field_title='My title')[0]
302 | self.assertEqual(src_item.id, db_item.id, 'Searching for the object did not return the expected object id.')
303 | self.assertEqual(src_item.get_deferred_fields(), {'field_description'}, 'Was expecting description in the set of deferred fields.')
304 |
305 | def test_concat_queries(self):
306 | items = Article.objects.bsearch_title_search('title')[::False] + NoUpdatedField.objects.search.query('match', field_title='My title')[::False]
307 | for item in items:
308 | model = item._meta.proxy_for_model if item._meta.proxy_for_model else type(item)
309 | self.assertIn(model, [Article, NoUpdatedField], 'Got an unmapped item ({}), or an item with an unexpected mapping.'.format(type(item)))
310 |
311 | def test_data_templates(self):
312 | # One article has a title that contains 'one'
313 | match_one = Article.objects.search.query('match', text='one')
314 | self.assertEqual(len(match_one), 2, 'Searching for "one" in text did not return exactly one item (got {}).'.format(match_one))
315 | self.assertEqual(match_one[0].title, 'Title one', 'Searching for "one" in text did not yield the first article (got {})'.format(match_one[0].title))
316 |
317 | # Two articles have a description that contain 'article'
318 | match_two = Article.objects.search.query('match', text='article')
319 | self.assertEqual(len(match_two), 4, 'Searching for "article" in text did not return exactly two items (got {})'.format(match_two))
320 |
321 | # Two articles have a link with 'example,' but since link isn't in the template, there should be zero results
322 | match_zero = Article.objects.search.query('match', text='example')
323 | self.assertEqual(len(match_zero), 0, 'Searching for "article" in text did not return exactly zero items (got {})'.format(match_zero))
324 |
325 | def test_fields(self):
326 | '''
327 | Checking that providing a specific field will correctly fetch these items from elasticsearch.
328 | '''
329 | for mdl, id_field in [(Article, 'id'), (User, 'user_id')]:
330 | raw_items = mdl.objects.search.fields('_id')[:5:True]
331 | self.assertTrue(all([dir(raw) == ['meta'] for raw in raw_items]), 'Requesting only _id returned more than just meta info from ES for model {}.'.format(mdl))
332 | items = mdl.objects.search.fields('_id')[:5]
333 | self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id only search did not return all results for model {}.'.format(mdl))
334 | items = mdl.objects.search.fields([id_field, '_id', '_source'])[:5]
335 | self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id, id and _source search did not return all results for model {}.'.format(mdl))
336 |
337 | def test_prepare_field(self):
338 | '''
339 | Check that providing a method to calculate the value of a field will yield correct results in the search index.
340 | '''
341 | user_int_description = {'user_id': 'bungie3',
342 | 'about': '123',
343 | 'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)),
344 | 'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)),
345 | }
346 | User.objects.create(**user_int_description)
347 |
348 | find_one = User.objects.search.filter('term', int_about=1)
349 | self.assertEqual(len(find_one), 4, 'Searching for users with default int description did not return exactly 4 items (got {})'.format(find_one))
350 |
351 | find_123 = User.objects.search.filter('term', int_about=123)
352 | self.assertEqual(len(find_one), 4, 'Searching for users with int description 123 did not return exactly 2 items (got {})'.format(find_123))
353 |
354 | find_zero = User.objects.search.filter('term', int_about=0)
355 | self.assertEqual(len(find_zero), 0, 'Searching for users with int description zero did not return exactly 0 items (got {})'.format(find_zero))
356 |
357 | def test_fun(self):
358 | '''
359 | Test fun queries.
360 | '''
361 | lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id')
362 | print(len(lazy)) # Returns the total hits computed by elasticsearch.
363 | assert all([type(item) == Article for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]])
364 |
365 | def test_meta(self):
366 | '''
367 | Test search meta is set.
368 | '''
369 | lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id')
370 | assert all([hasattr(item._searchmeta) for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]])
371 |
372 | def test_manangedbutempty(self):
373 | '''
374 | Tests that the indexing condition controls indexing properly.
375 | '''
376 | mbeo = ManangedButEmpty.objects.create(field_title='Some time', field_description='This should never be indexed.')
377 | idxi = len(ManangedButEmpty.objects.search)
378 | self.assertEquals(idxi, 0, 'ManagedButEmpty has {} indexed items instead of zero.'.format(idxi))
379 | mbeo.delete()
380 |
381 | def test_specify_index(self):
382 | self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for Article does not match number in database.')
383 | self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo_bis').count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database.')
384 | self.assertEqual(Article.objects.count(), Article.objects.bsearch_bisindex().count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database, using alias.')
385 | self.assertEqual(NoUpdatedField.objects.count(), NoUpdatedField.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for NoUpdatedField does not match number in database.')
386 | self.assertEqual(NoUpdatedField.objects.search_index('bungiesearch_demo_bis').count(), 0, 'Indexed items on bungiesearch_demo_bis for NoUpdatedField is zero.')
387 |
388 | def test_None_as_missing(self):
389 | missing = Article.objects.search_index('bungiesearch_demo').filter('missing', field='text_field')
390 | self.assertEqual(len(missing), 1, 'Filtering by missing text_field does not return exactly one item.')
391 | self.assertEqual(missing[0].text_field, None, 'The item with missing text_field does not have text_field=None.')
392 |
393 | def test_signal_setup_teardown(self):
394 | '''
395 | Tests that setup and tear down can be ran.
396 | '''
397 | btsp = BungieTestSignalProcessor()
398 | btsp.setup(Article)
399 | self.assertTrue(btsp.setup_ran, 'Calling setup on the signal processor did not set it up.')
400 | btsp.teardown(Article)
401 | self.assertTrue(btsp.teardown_ran, 'Calling teardown on the signal processor did not tear it down.')
402 |
--------------------------------------------------------------------------------
/tests/core/test_settings.py:
--------------------------------------------------------------------------------
1 | from django.conf import settings
2 | from django.test import TestCase
3 |
4 | from bungiesearch import Bungiesearch
5 |
6 |
7 | class SettingsTestCase(TestCase):
8 |
9 | def test_timeout_used(self):
10 | settings.BUNGIESEARCH['TIMEOUT'] = 29
11 | search = Bungiesearch()
12 |
13 | self.assertEqual(search.BUNGIE['TIMEOUT'], 29)
14 | self.assertEqual(search._using.transport.kwargs['timeout'], 29)
15 |
--------------------------------------------------------------------------------
/tests/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
7 | from django.core.management import execute_from_command_line
8 | execute_from_command_line(sys.argv)
9 |
--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | DJANGO_SETTINGS_MODULE=tests.settings
3 |
--------------------------------------------------------------------------------
/tests/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | DEBUG = True
5 | BASE_DIR = os.path.dirname(os.path.dirname(__file__))
6 | SECRET_KEY = 'cookies_are_delicious_delicacies'
7 | ROOT_URLCONF = 'urls'
8 | LANGUAGE_CODE = 'en-us'
9 | TIME_ZONE = 'UTC'
10 | USE_I18N = True
11 | USE_L10N = True
12 | USE_TZ = True
13 | MIDDLEWARE_CLASSES = ()
14 | DEFAULT_INDEX_TABLESPACE = ''
15 |
16 | # Make sure the copy of seeker in the directory above this one is used.
17 | sys.path.insert(0, BASE_DIR)
18 |
19 | INSTALLED_APPS = (
20 | 'bungiesearch',
21 | 'core',
22 | )
23 |
24 | DATABASES = {
25 | 'default': {
26 | 'ENGINE': 'django.db.backends.sqlite3',
27 | 'NAME': ':memory:',
28 | }
29 | }
30 |
31 | TEMPLATES = [
32 | {
33 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
34 | 'DIRS': [],
35 | 'APP_DIRS': True,
36 | 'OPTIONS': {
37 | 'context_processors': [
38 | 'django.contrib.auth.context_processors.auth',
39 | 'django.template.context_processors.debug',
40 | 'django.template.context_processors.i18n',
41 | 'django.template.context_processors.media',
42 | 'django.template.context_processors.static',
43 | 'django.template.context_processors.tz',
44 | 'django.contrib.messages.context_processors.messages',
45 | ],
46 | },
47 | },
48 | ]
49 |
50 | BUNGIESEARCH = {
51 | 'URLS': [os.getenv('ELASTIC_SEARCH_URL', 'localhost')],
52 | 'ES_SETTINGS': {
53 | 'http_auth': os.getenv('ELASTIC_SEARCH_AUTH')
54 | },
55 | 'ALIASES': {
56 | 'bsearch': 'core.search_aliases'
57 | },
58 | 'INDICES': {
59 | 'bungiesearch_demo': 'core.search_indices',
60 | 'bungiesearch_demo_bis': 'core.search_indices_bis'
61 | },
62 | 'SIGNALS': {
63 | 'BUFFER_SIZE': 1,
64 | 'SIGNAL_CLASS': 'core.bungie_signal.BungieTestSignalProcessor'
65 | }
66 | }
67 |
--------------------------------------------------------------------------------