├── .gitignore
├── .project
├── .pydevproject
├── .settings
    └── org.eclipse.core.resources.prefs
├── .travis.yml
├── LICENSE
├── README.rst
├── bungiesearch
    ├── __init__.py
    ├── aliases.py
    ├── fields.py
    ├── indices.py
    ├── logger.py
    ├── management
    │   ├── __init__.py
    │   └── commands
    │   │   ├── __init__.py
    │   │   ├── _utils.py
    │   │   ├── clear_index.py
    │   │   ├── rebuild_index.py
    │   │   └── search_index.py
    ├── managers.py
    ├── signals.py
    └── utils.py
├── requirements.txt
├── runtests.sh
├── setup.cfg
├── setup.py
├── setup.sh
└── tests
    ├── __init__.py
    ├── core
        ├── __init__.py
        ├── analysis.py
        ├── bungie_signal.py
        ├── models.py
        ├── search_aliases.py
        ├── search_indices.py
        ├── search_indices_bis.py
        ├── templates
        │   └── article.txt
        ├── test_bungiesearch.py
        └── test_settings.py
    ├── manage.py
    ├── pytest.ini
    └── settings.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.egg
 3 | 
 4 | /venv
 5 | /build/
 6 | /dist/
 7 | /cache/
 8 | /.cache/
 9 | /bungiesearch.egg-info/
10 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>bungiesearch</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.python.pydev.PyDevBuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 	</buildSpec>
14 | 	<natures>
15 | 		<nature>org.python.pydev.pythonNature</nature>
16 | 	</natures>
17 | </projectDescription>
18 | 


--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?eclipse-pydev version="1.0"?><pydev_project>
 3 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">bungiesearch</pydev_property>
 4 | <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
 5 | <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
 6 | <path>/${PROJECT_DIR_NAME}/bungiesearch</path>
 7 | <path>/${PROJECT_DIR_NAME}/tests</path>
 8 | </pydev_pathproperty>
 9 | </pydev_project>
10 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding/setup.py=utf-8
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | env:
 4 |   global:
 5 |     - TRAVIS=true
 6 |     - ELASTIC_SEARCH_URL=localhost
 7 | matrix:
 8 |   include:
 9 |     - python: "2.7"
10 |       env: DJANGO_VERSION=">=1.8,<1.9"
11 |     - python: "2.7"
12 |       env: DJANGO_VERSION=">=1.9,<1.10"
13 |     - python: "2.7"
14 |       env: DJANGO_VERSION=">=1.10,<1.11"
15 |     - python: "3.4"
16 |       env: DJANGO_VERSION=">=1.8,<1.9"
17 |     - python: "3.4"
18 |       env: DJANGO_VERSION=">=1.9,<1.10"
19 |     - python: "3.5"
20 |       env: DJANGO_VERSION=">=1.8,<1.9"
21 |     - python: "3.5"
22 |       env: DJANGO_VERSION=">=1.9,<1.10"
23 |     - python: "3.5"
24 |       env: DJANGO_VERSION=">=1.10,<1.11" COVERAGE=true
25 | install:
26 |   - pip install Django$DJANGO_VERSION
27 |   - pip install -r requirements.txt
28 |   - wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-2.3.0.zip
29 |   - unzip -o elasticsearch-2.3.0.zip &> /dev/null
30 | script:
31 |   - ./runtests.sh --cluster
32 | after_success:
33 |   test -n "$COVERAGE" && coveralls
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Sparrho
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of Sparrho nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | WARNING: UNMAINTAINED
  2 | ============
  3 | This package is no longer maintained. You may want to check out the `elasticsearch-dsl-py <https://github.com/elasticsearch/elasticsearch-dsl-py>`__ or `django-haystack <https://github.com/django-haystack/django-haystack>`__.
  4 | 
  5 | Bungiesearch
  6 | ============
  7 | 
  8 | |Build Status| |Coverage Status|
  9 | 
 10 | .. contents:: Table of contents
 11 |    :depth: 2
 12 | 
 13 | Purpose
 14 | =======
 15 | 
 16 | Bungiesearch is a Django wrapper for
 17 | `elasticsearch-dsl-py <https://github.com/elasticsearch/elasticsearch-dsl-py>`__.
 18 | It inherits from elasticsearch-dsl-py's ``Search`` class, so all the
 19 | fabulous features developed by the elasticsearch-dsl-py team are also
 20 | available in Bungiesearch. In addition, just like ``Search``,
 21 | Bungiesearch is a lazy searching class (and iterable), meaning you can
 22 | call functions in a row, or do something like the following.
 23 | 
 24 | .. code:: python
 25 | 
 26 |     lazy = Article.objects.search.query('match', _all='Description')
 27 |     print len(lazy) # Prints the number of hits by only fetching the number of items.
 28 |     for item in lazy[5:10]:
 29 |         print item
 30 | 
 31 | Features
 32 | ========
 33 | 
 34 | -  Core Python friendly
 35 | 
 36 |    -  Iteration (``[x for x in lazy_search]``)
 37 |    -  Get items (``lazy_search[10]``)
 38 |    -  Number of hits via ``len`` (``len(lazy_search)``)
 39 | 
 40 | -  Index management
 41 | 
 42 |    -  Creating and deleting an index.
 43 |    -  Creating, updating and deleting doctypes and their mappings.
 44 |    -  Update index doctypes.
 45 | 
 46 | -  Django Model Mapping
 47 | 
 48 |    -  Very easy mapping (no lies).
 49 |    -  Automatic model mapping (and supports undefined models by
 50 |       returning a ``Result`` instance of ``elasticsearch-dsl-py``).
 51 |    -  Efficient database fetching:
 52 | 
 53 |       -  One fetch for all items of a given model.
 54 |       -  Fetches only desired fields.
 55 | 
 56 | -  Django Manager
 57 | 
 58 |    -  Easy model integration:
 59 |       ``MyModel.search.query("match", _all="something to search")``.
 60 |    -  Search aliases (search shortcuts with as many parameters as
 61 |       wanted): ``Tweet.object.bungie_title_search("bungie")`` or
 62 |       ``Article.object.bungie_title_search("bungie")``, where
 63 |       ``bungie_title_search`` is uniquely defined.
 64 | 
 65 | -  Django signals
 66 | 
 67 |    -  Connect to post save and pre delete signals for the elasticsearch
 68 |       index to correctly reflect the database (almost) at all times.
 69 | 
 70 | -  Requirements
 71 | 
 72 |    -  Django >= 1.8
 73 |    -  Python 2.7, 3.4, 3.5
 74 | 
 75 | Feature examples
 76 | ----------------
 77 | 
 78 | See section "Full example" at the bottom of page to see the code needed
 79 | to perform these following examples. ### Query a word (or list thereof)
 80 | on a managed model.
 81 | 
 82 | ``Article.objects.search.query('match', _all='Description')``
 83 | 
 84 | Use a search alias on a model's manager.
 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 86 | 
 87 | ``Article.objects.bsearch_title_search('title')``
 88 | 
 89 | Use a search alias on a bungiesearch instance.
 90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 91 | 
 92 | ``Article.objects.search.bsearch_title_search('title').bsearch_titlefilter('filter this title')``
 93 | 
 94 | Iterate over search results
 95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 96 | 
 97 | .. code:: python
 98 | 
 99 |     # Will print the Django model instance.
100 |     for result in Article.objects.search.query('match', _all='Description'):
101 |         print result
102 | 
103 | Fetch a single item
104 | ~~~~~~~~~~~~~~~~~~~
105 | 
106 | .. code:: python
107 | 
108 |     Article.objects.search.query('match', _all='Description')[0]
109 | 
110 | Get the number of returned items
111 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
112 | 
113 | .. code:: python
114 | 
115 |     print len(Article.objects.search.query('match', _all='Description'))
116 | 
117 | Deferred model instantiation
118 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119 | 
120 | .. code:: python
121 | 
122 |     # Will print the Django model instance's primary key. Will only fetch the `pk` field from the database.
123 |     for result in Article.objects.search.query('match', _all='Description').only('pk'):
124 |         print result.pk
125 | 
126 | Elasticsearch limited field fetching
127 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
128 | 
129 | .. code:: python
130 | 
131 |     # Will print the Django model instance. However, elasticsearch's response only has the `_id` field.
132 |     for result in Article.objects.search.query('match', _all='Description').fields('_id'):
133 |         print result
134 | 
135 | Get a specific number of items with an offset.
136 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
137 | 
138 | This is actually elasticseach-dsl-py functionality, but it's
139 | demonstrated here because we can iterate over the results via
140 | Bungiesearch.
141 | 
142 | .. code:: python
143 | 
144 |     for item in Article.objects.bsearch_title_search('title').only('pk').fields('_id')[5:7]:
145 |         print item
146 | 
147 | Lazy objects
148 | ~~~~~~~~~~~~
149 | 
150 | .. code:: python
151 | 
152 |     lazy = Article.objects.bsearch_title_search('title')
153 |     print len(lazy)
154 |     for item in lazy.filter('range', effective_date={'lte': '2014-09-22'}):
155 |         print item
156 | 
157 | Installation
158 | ============
159 | 
160 | Unless noted otherwise, each step is required.
161 | 
162 | Install the package
163 | -------------------
164 | 
165 | The easiest way is to install the package from PyPi:
166 | 
167 | ``pip install bungiesearch``
168 | 
169 | **Note:** Check your version of Django after installing bungiesearch. It
170 | was reported to me directly that installing bungiesearch may upgrade
171 | your version of Django, although I haven't been able to confirm that
172 | myself. Bungiesearch depends on Django 1.7 and above.
173 | 
174 | In Django
175 | ---------
176 | 
177 | Updating your Django models
178 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
179 | 
180 | **Note:** this part is only needed if you want to be able to use search
181 | aliases, which allow you to define shortcuts to complex queries,
182 | available directly from your Django models. I think it's extremely
183 | practical.
184 | 
185 | 1. Open your ``models.py`` file.
186 | 2. Add the bungiesearch manager import:
187 |    ``from bungiesearch.managers import BungiesearchManager``
188 | 3. Find the model, or models, you wish to index on Elasticsearch and set
189 |    them to be managed by Bungiesearch by adding the objects field to
190 |    them, as such: ``objects = BungiesearchManager()``. You should now
191 |    have a Django model `similar to
192 |    this <https://github.com/ChristopherRabotin/bungiesearch#django-model>`__.
193 | 
194 | Creating bungiesearch search indexes
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 | 
197 | The search indexes define how bungiesearch should serialize each of the
198 | model's objects. It effectively defines how your object is serialized
199 | and how the ES index should be structured. These are referred to as
200 | `ModelIndex <https://github.com/ChristopherRabotin/bungiesearch#modelindex-1>`__\ es.
201 | 
202 | A good practice here is to have all the bungiesearch stuff in its own
203 | package. For example, for the section of the Sparrho platform that uses
204 | Django, we have a package called ``search`` where we define the search
205 | indexes, and a subpackage called ``aliases`` which has the many aliases
206 | we use (more on that latter).
207 | 
208 | 1. Create a subclass of ``ModelIndex``, which you can import from from
209 |    ``bungiesearch.indices import ModelIndex``, in a new module
210 |    preferably.
211 | 2. In this class, define a class called ``Meta``: it will hold meta
212 |    information of this search index for bungiesearch's internal working.
213 | 3. Import the Django model you want to index (from your models file)
214 |    and, in the Meta class, define a field called ``model``, which must
215 |    be set to the model you want indexed.
216 | 4. By default, bungiesearch will index every field of your model. This
217 |    may not always be desired, so you can define which fields must be
218 |    excluded in this ``Meta`` class, via the exclude field.
219 | 5. There are plenty of options, so definitely have a read through the
220 |    documentation for
221 |    `ModelIndex <https://github.com/ChristopherRabotin/bungiesearch#modelindex-1>`__.
222 | 
223 | Here's `an
224 | example <https://github.com/ChristopherRabotin/bungiesearch#modelindex>`__ of a
225 | search index. There can be many such definitions in a file.
226 | 
227 | Django settings
228 | ~~~~~~~~~~~~~~~
229 | 
230 | This is the final required step. Here's the `full
231 | documentation <https://github.com/ChristopherRabotin/bungiesearch#settings>`__ of
232 | this step.
233 | 
234 | 1. Open your settings file and add a ``BUNGIESEARCH`` variable, which
235 |    must be a dictionary.
236 | 2. Define ``URLS`` as a list of URLs (which can contain only one) of
237 |    your ES servers.
238 | 3. Define the ``INDICES`` key as a dictionary where the key is the name
239 |    of the index on ES that you want, and the value is the full Python
240 |    path to the module which has all the ModelIndex classes for to be
241 |    indexed on that index name.
242 | 4. Set ``ALIASES`` to an empty dictionary (until you define any search
243 |    aliases).
244 | 5. You can keep other values as their defaults.
245 | 
246 | In your shell
247 | -------------
248 | 
249 | Create the ES indexes
250 | ~~~~~~~~~~~~~~~~~~~~~
251 | 
252 | From your shell, in the Django environment, run the following:
253 | 
254 | ``python manage.py search_index --create``
255 | 
256 | Start populating the index
257 | --------------------------
258 | 
259 | Run the following which will take each of the objects in your model,
260 | serialize them, and add them to the elasticsearch index.
261 | 
262 | ``python manage.py search_index --update``
263 | 
264 | **Note:** With additional parameters, you can limit the number of
265 | documents to be indexed, as well as set conditions on whether they
266 | should be indexed based on updated time for example.
267 | 
268 | In Elasticsearch
269 | ----------------
270 | 
271 | You can now open your elasticsearch dashboard, such as Elastic HQ, and
272 | see that your index is created with the appropriate mapping and has
273 | items that are indexed.
274 | 
275 | Quick start example
276 | ===================
277 | 
278 | This example is from the ``test`` folder. It may be partially out-dated,
279 | so please refer to the ``test`` folder for the latest version.
280 | 
281 | Procedure
282 | ---------
283 | 
284 | 1. In your models.py file (or your managers.py), import bungiesearch and
285 |    use it as a model manager.
286 | 2. Define one or more ModelIndex subclasses which define the mapping
287 |    between your Django model and elasticsearch.
288 | 3. (Optional) Define SearchAlias subclasses which make it trivial to
289 |    call complex elasticsearch-dsl-py functions.
290 | 4. Add a BUNGIESEARCH variable in your Django settings, which must
291 |    contain the elasticsearch URL(s), the modules for the indices, the
292 |    modules for the search aliases and the signal definitions.
293 | 
294 | Example
295 | -------
296 | 
297 | Here's the code which is applicable to the previous examples. ### Django
298 | Model
299 | 
300 | .. code:: python
301 | 
302 |     from django.db import models
303 |     from bungiesearch.managers import BungiesearchManager
304 | 
305 |     class Article(models.Model):
306 |         title = models.TextField(db_index=True)
307 |         authors = models.TextField(blank=True)
308 |         description = models.TextField(blank=True)
309 |         link = models.URLField(max_length=510, unique=True, db_index=True)
310 |         published = models.DateTimeField(null=True)
311 |         created = models.DateTimeField(auto_now_add=True)
312 |         updated = models.DateTimeField(null=True)
313 |         tweet_count = models.IntegerField()
314 |         raw = models.BinaryField(null=True)
315 |         source_hash = models.BigIntegerField(null=True)
316 |         missing_data = models.CharField(blank=True, max_length=255)
317 |         positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
318 |         negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
319 |         popularity_index = models.IntegerField(default=0)
320 | 
321 |         objects = BungiesearchManager()
322 | 
323 |         class Meta:
324 |             app_label = 'core'
325 | 
326 | ModelIndex
327 | ~~~~~~~~~~
328 | 
329 | The following ModelIndex will generate a mapping containing all fields
330 | from ``Article``, minus those defined in ``ArticleIndex.Meta.exclude``.
331 | When the mapping is generated, each field will the most appropriate
332 | `elasticsearch core
333 | type <http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-core-types.html>`__,
334 | with default attributes (as defined in bungiesearch.fields).
335 | 
336 | These default attributes can be overwritten with
337 | ``ArticleIndex.Meta.hotfixes``: each dictionary key must be field
338 | defined either in the model or in the ModelIndex subclass
339 | (``ArticleIndex`` in this case).
340 | 
341 | .. code:: python
342 | 
343 |     from core.models import Article
344 |     from bungiesearch.fields import DateField, StringField
345 |     from bungiesearch.indices import ModelIndex
346 | 
347 | 
348 |     class ArticleIndex(ModelIndex):
349 |         effectived_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
350 |         meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
351 | 
352 |         class Meta:
353 |             model = Article
354 |             exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
355 |             hotfixes = {'updated': {'null_value': '2013-07-01'},
356 |                         'title': {'boost': 1.75},
357 |                         'description': {'boost': 1.35},
358 |                         'full_text': {'boost': 1.125}}
359 | 
360 | SearchAlias
361 | ~~~~~~~~~~~
362 | 
363 | Defines a search alias for one or more models (in this case only for
364 | ``core.models.Article``).
365 | 
366 | .. code:: python
367 | 
368 |     from core.models import Article
369 |     from bungiesearch.aliases import SearchAlias
370 | 
371 | 
372 |     class SearchTitle(SearchAlias):
373 |         def alias_for(self, title):
374 |             return self.search_instance.query('match', title=title)
375 | 
376 |         class Meta:
377 |             models = (Article,)
378 |             alias_name = 'title_search' # This is optional. If none is provided, the name will be the class name in lower case.
379 | 
380 |     class InvalidAlias(SearchAlias):
381 |         def alias_for_does_not_exist(self, title):
382 |             return title
383 | 
384 |         class Meta:
385 |             models = (Article,)
386 | 
387 | Django settings
388 | ~~~~~~~~~~~~~~~
389 | 
390 | .. code:: python
391 | 
392 |     BUNGIESEARCH = {
393 |                     'URLS': [os.getenv('ELASTIC_SEARCH_URL')],
394 |                     'INDICES': {'bungiesearch_demo': 'core.search_indices'},
395 |                     'ALIASES': {'bsearch': 'myproject.search_aliases'},
396 |                     'SIGNALS': {'BUFFER_SIZE': 1}  # uses BungieSignalProcessor
397 |                     }
398 | 
399 | Documentation
400 | =============
401 | 
402 | ModelIndex
403 | ----------
404 | 
405 | A ``ModelIndex`` defines mapping and object extraction for indexing of a
406 | given Django model.
407 | 
408 | Any Django model to be managed by bungiesearch must have a defined
409 | ModelIndex subclass. This subclass must contain a subclass called
410 | ``Meta`` which must have a ``model`` attribute (sets the model which it
411 | represents).
412 | 
413 | Class attributes
414 | ~~~~~~~~~~~~~~~~
415 | 
416 | As detailed below, the doc type mapping will contain fields from the
417 | model it related to. However, one may often need to index fields which
418 | correspond to either a concatenation of fields of the model or some
419 | logical operation.
420 | 
421 | Bungiesearch makes this very easy: simply define a class attribute as
422 | whichever core type, and set to the ``eval_as`` constructor parameter to
423 | a one line Python statement. The object is referenced as ``obj`` (not
424 | ``self`` nor ``object``, just ``obj``).
425 | 
426 | Example
427 | ^^^^^^^
428 | 
429 | This is a partial example as the Meta subclass is not defined, yet
430 | mandatory (cf. below).
431 | 
432 | .. code:: python
433 | 
434 |     from bungiesearch.fields import DateField, StringField
435 |     from bungiesearch.indices import ModelIndex
436 | 
437 |     class ArticleIndex(ModelIndex):
438 |         effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
439 |         meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
440 | 
441 | Here, both ``effective_date`` and ``meta_data`` will be part of the doc
442 | type mapping, but won't be reversed mapped since those fields do not
443 | exist in the model.
444 | 
445 | This can also be used to index foreign keys:
446 | 
447 | .. code:: python
448 | 
449 |     some_field_name = StringField(eval_as='",".join([item for item in obj.some_foreign_relation.values_list("some_field", flat=True)]) if obj.some_foreign_relation else ""')
450 | 
451 | Class methods
452 | ~~~~~~~~~~~~~
453 | 
454 | matches\_indexing\_condition
455 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
456 | 
457 | Override this function to specify whether an item should be indexed or
458 | not. This is useful when defining multiple indices (and ModelIndex
459 | classes) for a given model. This method's signature and super class code
460 | is as follows, and allows indexing of all items.
461 | 
462 | .. code:: python
463 | 
464 |     def matches_indexing_condition(self, item):
465 |         return True
466 | 
467 | For example, if a given elasticsearch index should contain only item
468 | whose title starts with ``"Awesome"``, then this method can be
469 | overridden as follows.
470 | 
471 | .. code:: python
472 | 
473 |     def matches_indexing_condition(self, item):
474 |         return item.title.startswith("Awesome")
475 | 
476 | Meta subclass attributes
477 | ~~~~~~~~~~~~~~~~~~~~~~~~
478 | 
479 | **Note**: in the following, any variable defined a being a ``list``
480 | could also be a ``tuple``. ##### model *Required:* defines the Django
481 | model for which this ModelIndex is applicable.
482 | 
483 | fields
484 | ^^^^^^
485 | 
486 | *Optional:* list of fields (or columns) which must be fetched when
487 | serializing the object for elasticsearch, or when reverse mapping the
488 | object from elasticsearch back to a Django Model instance. By default,
489 | all fields will be fetched. Setting this *will* restrict which fields
490 | can be fetched and may lead to errors when serializing the object. It is
491 | recommended to use the ``exclude`` attribute instead (cf. below).
492 | 
493 | exclude
494 | ^^^^^^^
495 | 
496 | *Optional:* list of fields (or columns) which must not be fetched when
497 | serializing or deserializing the object.
498 | 
499 | hotfixes
500 | ^^^^^^^^
501 | 
502 | *Optional:* a dictionary whose keys are index fields and whose values
503 | are dictionaries which define `core type
504 | attributes <http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-core-types.html>`__.
505 | By default, there aren't any special settings, apart for String fields,
506 | where the
507 | `analyzer <http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-analyzers.html>`__
508 | is set to
509 | ```snowball`` <http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-snowball-analyzer.html>`__
510 | (``{'analyzer': 'snowball'}``).
511 | 
512 | additional\_fields
513 | ^^^^^^^^^^^^^^^^^^
514 | 
515 | *Optional:* additional fields to fetch for mapping, may it be for
516 | ``eval_as`` fields or when returning the object from the database.
517 | 
518 | id\_field
519 | ^^^^^^^^^
520 | 
521 | *Optional:* the model field to use as a unique ID for elasticsearch's
522 | metadata ``_id``. Defaults to ``id`` (also called
523 | ```pk`` <https://docs.djangoproject.com/en/dev/topics/db/models/#automatic-primary-key-fields>`__).
524 | 
525 | updated\_field
526 | ^^^^^^^^^^^^^^
527 | 
528 | *Optional:* set the model's field which can be filtered on dates in
529 | order to find when objects have been updated. Note, this is *mandatory*
530 | to use ``--start`` and/or ``--end`` when updating index (with
531 | ``search_index --update``).
532 | 
533 | optimize\_queries
534 | ^^^^^^^^^^^^^^^^^
535 | 
536 | *Optional:* set to True to make efficient queries when automatically
537 | mapping to database objects. This will *always* restrict fetching to the
538 | fields set in ``fields`` and in ``additional_fields``. *Note:* You can
539 | also perform an optimal database query with ``.only('__model')``, which
540 | will use the same fields as ``optimize_queries``, or
541 | ``.only('__fields')``, which will use the fields provided in the
542 | ``.fields()`` call.
543 | 
544 | indexing\_query
545 | ^^^^^^^^^^^^^^^
546 | 
547 | *Optional:* set to a QuerySet instance to specify the query used when
548 | the search\_index command is ran to index. This **does not** affect how
549 | each piece of content is indexed.
550 | 
551 | default
552 | ^^^^^^^
553 | 
554 | Enables support for a given model to be indexed on several elasticsearch
555 | indices. Set to ``False`` on all but the default index. **Note**: if all
556 | managed models are set with ``default=False`` then Bungiesearch will
557 | fail to find and index that model.
558 | 
559 | Example
560 | ~~~~~~~
561 | 
562 | Indexes all objects of ``Article``, as long as their ``updated``
563 | datetime is less than `21 October 2015
564 | 04:29 <https://en.wikipedia.org/wiki/Back_to_the_Future_Part_II>`__.
565 | 
566 | .. code:: python
567 | 
568 |     from core.models import Article
569 |     from bungiesearch.indices import ModelIndex
570 |     from datetime import datetime
571 | 
572 |     class ArticleIndex(ModelIndex):
573 | 
574 |         def matches_indexing_condition(self, item):
575 |             return item.updated < datetime.datetime(2015, 10, 21, 4, 29)
576 | 
577 |         class Meta:
578 |             model = Article
579 |             id_field = 'id' # That's actually the default value, so it's not really needed.
580 |             exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
581 |             hotfixes = {'updated': {'null_value': '2013-07-01'},
582 |                         'title': {'boost': 1.75},
583 |                         'description': {'boost': 1.35},
584 |                         'full_text': {'boost': 1.125}}
585 |             optimize_queries = True
586 |             indexing_query = Article.objects.defer(*exclude).select_related().all().prefetch_related('tags')
587 | 
588 | SearchAlias
589 | -----------
590 | 
591 | A ``SearchAlias`` define search shortcuts (somewhat similar to `Django
592 | managers <https://docs.djangoproject.com/en/dev/topics/db/managers/>`__).
593 | Often times, a given search will be used in multiple parts of the code.
594 | SearchAliases allow you define those queries, filters, or any
595 | bungiesearch/elasticsearch-dsl-py calls as an alias.
596 | 
597 | A search alias is either applicable to a ``list`` (or ``tuple``) of
598 | managed models, or to any bungiesearch instance. It's very simple, so
599 | here's an example which is detailed right below.
600 | 
601 | Example
602 | ~~~~~~~
603 | 
604 | The most simple implementation of a SearchAlias is as follows. This
605 | search alias can be called via ``Article.objects.bungie_title`` (or
606 | ``Article.objects.search.bungie_title``), supposing that the namespace
607 | is set to ``None`` in the settings (cf. below).
608 | 
609 | Definition
610 | ^^^^^^^^^^
611 | 
612 | .. code:: python
613 | 
614 |     from bungiesearch.aliases import SearchAlias
615 | 
616 |     class Title(SearchAlias):
617 |         def alias_for(self, title):
618 |             return self.search_instance.query('match', title=title)
619 | 
620 | Usage
621 | ^^^^^
622 | 
623 | .. code:: python
624 | 
625 |     Article.objects.bungie_title('title')
626 | 
627 | Method overwrite
628 | ~~~~~~~~~~~~~~~~
629 | 
630 | Any implementation needs to inherit from
631 | ``bungiesearch.aliases.SearchAlias`` and overwrite ``alias_for``. You
632 | can set as many or as little parameters as you want for that function
633 | (since bungiesearch only return the pointer to that function without
634 | actually calling it).
635 | 
636 | Since each managed model has its own doc type, ``self.search_instance``
637 | is a bungiesearch instance set to search the specific doctype.
638 | 
639 | Meta subclass attributes
640 | ~~~~~~~~~~~~~~~~~~~~~~~~
641 | 
642 | Although not mandatory, the ``Meta`` subclass enabled custom naming and
643 | model restrictions for a search alias.
644 | 
645 | models
646 | ^^^^^^
647 | 
648 | *Optional:* ``list`` (or ``tuple``) of Django models which are allowed
649 | to use this search alias. If a model which is not allowed to use this
650 | SearchAlias tries it, a ``ValueError`` will be raised.
651 | 
652 | alias\_name
653 | ^^^^^^^^^^^
654 | 
655 | *Optional:* A string corresponding the suffix name of this search alias.
656 | Defaults to the lower case class name.
657 | 
658 | **WARNING**: As explained in the "Settings" section below, all search
659 | aliases in a given module share the prefix (or namespace). This is to
660 | prevent aliases from accidently overwriting Django manager function
661 | (e.g. ``update`` or ``get``). In other words, if you define the
662 | ``alias_name`` to ``test``, then it must be called as
663 | ``model_obj.objects.$prefix$_test`` where ``$prefix$`` is the prefix
664 | defined in the settings. This prefix is also applicable to search
665 | aliases which are available via bungiesearch instances directly. Hence,
666 | one can define in one module search utilities (e.g. ``regex`` and
667 | ``range``) and define model specific aliases (e.g. ``title``) in another
668 | module, and use both in conjunction as such:
669 | ``Article.objects.search.bungie_title('search title').utils_range(field='created', gte='2014-05-20', as_query=True)``.
670 | These aliases can be concatenated ad vitam aeternam.
671 | 
672 | Sophisticated example
673 | ~~~~~~~~~~~~~~~~~~~~~
674 | 
675 | This example shows that we can have some fun with search aliases. In
676 | this case, we define a Range alias which is applicable to any field on
677 | any model.
678 | 
679 | .. code:: python
680 | 
681 |     class Range(SearchAlias):
682 |         def alias_for(self, field, gte=None, lte=None, boost=None, as_query=False):
683 |             body = {field: {}}
684 |             if gte:
685 |                 body[field]['gte'] = gte
686 |             if lte:
687 |                 body[field]['lte'] = lte
688 |             if boost:
689 |                 if not as_query:
690 |                     logging.warning('Boost is not applicable to search alias Range when not used as a query.')
691 |                 else:
692 |                     body[field]['boost'] = boost
693 |             if as_query:
694 |                 return self.search_instance.query({'range': body})
695 |             return self.search_instance.filter({'range': body})
696 | 
697 | We can use it as such
698 | ``Article.objects.bungie_range(field='created', gte='2014-05-20', as_query=True)``.
699 | 
700 | Settings
701 | --------
702 | Add 'bungiesearch' to INSTALLED_APPS.
703 | 
704 | You must define ``BUNGIESEARCH`` in your Django settings in order for
705 | bungiesearch to know elasticsearch URL(s) and which index name contains
706 | mappings for each ModelIndex.
707 | 
708 | .. code:: python
709 | 
710 |     BUNGIESEARCH = {
711 |                     'URLS': ['localhost'], # No leading http:// or the elasticsearch client will complain.
712 |                     'INDICES': {'main_index': 'myproject.myapp.myindices'} # Must be a module path.
713 |                     'ALIASES': {'bsearch': 'myproject.search_aliases'},
714 |                     'SIGNALS': {'BUFFER_SIZE': 1},
715 |                     'TIMEOUT': 5
716 |                     }
717 | 
718 | URLS
719 | ~~~~
720 | 
721 | *Required:* must be a list of URLs which host elasticsearch instance(s).
722 | This is directly sent to elasticsearch-dsl-py, so any issue with
723 | multiple URLs should be refered to them.
724 | 
725 | INDICES
726 | ~~~~~~~
727 | 
728 | *Required:* must be a dictionary where each key is the name of an
729 | elasticsearch index and each value is a path to a Python module
730 | containing classes which inherit from
731 | ``bungiesearch.indices.ModelIndex`` (cf. below).
732 | 
733 | ALIASES
734 | ~~~~~~~
735 | 
736 | *Optional:* a dictionary whose key is the alias namespace and whose
737 | value is the Python module containing classes which inherit from
738 | ``bungiesearch.aliases.SearchAlias``. If the namespace is ``None``, then
739 | the alias will be named ``bungie``. If the namespace is an empty string,
740 | there will be no alias namespace. The provided namespace will be
741 | appended by an underscore. In the example above, each search alias
742 | defined in ``myproject.search_aliases`` will be referenced as
743 | ``$ModelObj$.objects.bsearch_$alias$``, where ``$ModelObj$`` is a Django
744 | model and ``$alias$`` is the name of the search alias.
745 | 
746 | The purpose is to not accidently overwrite Django's default manager
747 | functions with search aliases.
748 | 
749 | SIGNALS
750 | ~~~~~~~
751 | 
752 | *Optional:* if it exists, it must be a dictionary (even empty), and will
753 | connect to the ``post save`` and ``pre delete`` model functions of *all*
754 | models using ``bungiesearch.managers.BungiesearchManager`` as a manager.
755 | One may also define a signal processor class for more custom
756 | functionality by placing the string value of the module path under a key
757 | called ``SIGNAL_CLASS`` in the dictionary value of ``SIGNALS`` and
758 | defining ``setup`` and ``teardown`` methods, which take ``model`` as the
759 | only parameter. These methods connect and disconnect the signal
760 | processing class to django signals (signals are connected to each model
761 | which uses a BungiesearchManager).
762 | 
763 | If ``SIGNALS`` is not defined in the settings, *none* of the models
764 | managed by BungiesearchManager will automatically update the index when
765 | a new item is created or deleted.
766 | 
767 | BUFFER\_SIZE
768 | ^^^^^^^^^^^^
769 | 
770 | *Optional:* an integer representing the number of items to buffer before
771 | making a bulk index update, defaults to ``100``.
772 | 
773 | **WARNING**: if your application is shut down before the buffer is
774 | emptied, then any buffered instance *will not* be indexed on
775 | elasticsearch. Hence, a possibly better implementation is wrapping
776 | ``post_save_connector`` and ``pre_delete_connector`` from
777 | ``bungiesearch.signals`` in a celery task. It is not implemented as such
778 | here in order to not require ``celery``.
779 | 
780 | TIMEOUT
781 | ~~~~~~~
782 | 
783 | *Optional:* Elasticsearch connection timeout in seconds. Defaults to
784 | ``5``.
785 | 
786 | Testing
787 | =======
788 | 
789 | The easiest way to run the tests is to install all dev dependencies using
790 | ``./setup.sh`` then run ``./test.sh``
791 | 
792 | All Bungiesearch tests are in ``tests/core/test_bungiesearch.py``. You
793 | can run the tests by creating a Python virtual environment, installing
794 | the requirements from ``requirements.txt``, installing the package
795 | (``pip install .``) and running ``python tests/manage.py test``. Make
796 | sure to update ``tests/settings.py`` to use your own elasticsearch URLs,
797 | or update the ELASTIC\_SEARCH\_URL environment variable.
798 | 
799 | .. |Build Status| image:: https://travis-ci.org/ChristopherRabotin/bungiesearch.svg?branch=master
800 |    :target: https://travis-ci.org/ChristopherRabotin/bungiesearch
801 | .. |Coverage Status| image:: https://coveralls.io/repos/ChristopherRabotin/bungiesearch/badge.svg?branch=master&service=github
802 |    :target: https://coveralls.io/github/ChristopherRabotin/bungiesearch?branch=master
803 | 


--------------------------------------------------------------------------------
/bungiesearch/__init__.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from importlib import import_module
  3 | 
  4 | from django.conf import settings
  5 | from elasticsearch.client import Elasticsearch
  6 | from elasticsearch_dsl.search import Search
  7 | from six import iteritems, itervalues, string_types
  8 | 
  9 | from .aliases import SearchAlias
 10 | from .indices import ModelIndex
 11 | from .logger import logger
 12 | 
 13 | 
 14 | class Bungiesearch(Search):
 15 |     '''
 16 |     This object is used to read Django settings and initialize the elasticsearch connection.
 17 |     '''
 18 |     DEFAULT_TIMEOUT = 5
 19 |     BUNGIE = settings.BUNGIESEARCH
 20 | 
 21 |     # The following code loads each model index_name module (as defined in the settings) and stores
 22 |     # index_name name to model index_name, and index_name name to model. Settings shouldn't change between
 23 |     # subsequent calls to Search(), which is why this is static code.
 24 | 
 25 |     _cached_es_instances = {}
 26 |     # Let's go through the settings in order to map each defined Model/ModelIndex to the elasticsearch index_name.
 27 |     _model_to_index, _model_name_to_index, _model_name_to_model_idx = defaultdict(list), defaultdict(list), defaultdict(list)
 28 |     _index_to_model, _idx_name_to_mdl_to_mdlidx = defaultdict(list), defaultdict(dict)
 29 |     _model_name_to_default_index, _alias_hooks = {}, {}
 30 |     _managed_models = []
 31 |     __loaded_indices__ = False
 32 | 
 33 |     @classmethod
 34 |     def __load_settings__(cls):
 35 |         if cls.__loaded_indices__:
 36 |             return
 37 |         cls.__loaded_indices__ = True
 38 | 
 39 |         # Loading indices.
 40 |         for index_name, module_str in iteritems(cls.BUNGIE['INDICES']):
 41 |             index_module = import_module(module_str)
 42 |             for index_obj in itervalues(index_module.__dict__):
 43 |                 try:
 44 |                     if issubclass(index_obj, ModelIndex) and index_obj != ModelIndex:
 45 |                         index_instance = index_obj()
 46 |                         assoc_model = index_instance.get_model()
 47 |                         cls._index_to_model[index_name].append(assoc_model)
 48 |                         cls._model_name_to_model_idx[assoc_model.__name__].append(index_instance)
 49 |                         cls._idx_name_to_mdl_to_mdlidx[index_name][assoc_model.__name__] = index_instance
 50 |                         if index_instance.is_default:
 51 |                             if assoc_model.__name__ in cls._model_name_to_default_index:
 52 |                                 raise AttributeError('ModelIndex {} on index {} is marked as default, but {} was already set as default.'.format(index_instance, index_name, cls._model_name_to_default_index[assoc_model.__name__]))
 53 |                             cls._model_name_to_default_index[assoc_model.__name__] = index_instance
 54 |                 except TypeError:
 55 |                     pass # Oops, just attempted to get subclasses of a non-class.
 56 | 
 57 |         # Create reverse maps in order to have O(1) access.
 58 |         for index_name, models in iteritems(cls._index_to_model):
 59 |             for model in models:
 60 |                 cls._model_to_index[model].append(index_name)
 61 |                 cls._model_name_to_index[model.__name__].append(index_name)
 62 | 
 63 |         # Loading aliases.
 64 |         for alias_prefix, module_str in iteritems(cls.BUNGIE.get('ALIASES', {})):
 65 |             if alias_prefix is None:
 66 |                 alias_prefix = 'bungie'
 67 |             if alias_prefix != '':
 68 |                 alias_prefix += '_'
 69 |             alias_module = import_module(module_str)
 70 |             for alias_obj in itervalues(alias_module.__dict__):
 71 |                 try:
 72 |                     if issubclass(alias_obj, SearchAlias) and alias_obj != SearchAlias:
 73 |                         alias_instance = alias_obj()
 74 |                         cls._alias_hooks[alias_prefix + alias_instance.alias_name] = alias_instance
 75 |                 except TypeError:
 76 |                     pass # Oops, just attempted to get subclasses of a non-class.
 77 | 
 78 |     @classmethod
 79 |     def _build_key(cls, urls, timeout, **settings):
 80 |         # Order the settings by key and then turn it into a string with
 81 |         # repr. There are a lot of edge cases here, but the worst that
 82 |         # happens is that the key is different and so you get a new
 83 |         # Elasticsearch. We'll probably have to tweak this.
 84 |         settings = sorted(settings.items(), key=lambda item: item[0])
 85 |         settings = repr([(k, v) for k, v in settings])
 86 |         # elasticsearch allows URLs to be a string, so we make sure to
 87 |         # account for that when converting whatever it is into a tuple.
 88 |         if isinstance(urls, string_types):
 89 |             urls = (urls,)
 90 |         else:
 91 |             urls = tuple(urls)
 92 |         # Generate a tuple of all the bits and return that as the key
 93 |         # because that's hashable.
 94 |         key = (urls, timeout, settings)
 95 |         return key
 96 | 
 97 |     @classmethod
 98 |     def get_index(cls, model, via_class=False):
 99 |         '''
100 |         Returns the index name (as a string) for the given model as a class or a string.
101 |         :param model: model name or model class if via_class set to True.
102 |         :param via_class: set to True if parameter model is a class.
103 |         :raise KeyError: If the provided model does not have any index associated.
104 |         '''
105 |         try:
106 |             return cls._model_to_index[model] if via_class else cls._model_name_to_index[model]
107 |         except KeyError:
108 |             raise KeyError('Could not find any index defined for model {}. Is the model in one of the model index modules of BUNGIESEARCH["INDICES"]?'.format(model))
109 | 
110 |     @classmethod
111 |     def get_model_index(cls, model, default=True):
112 |         '''
113 |         Returns the default model index for the given model, or the list of indices if default is False.
114 |         :param model: model name as a string.
115 |         :raise KeyError: If the provided model does not have any index associated.
116 |         '''
117 |         try:
118 |             if default:
119 |                 return cls._model_name_to_default_index[model]
120 |             return cls._model_name_to_model_idx[model]
121 |         except KeyError:
122 |             raise KeyError('Could not find any model index defined for model {}.'.format(model))
123 | 
124 |     @classmethod
125 |     def get_indices(cls):
126 |         '''
127 |         Returns the list of indices defined in the settings.
128 |         '''
129 |         return cls._idx_name_to_mdl_to_mdlidx.keys()
130 | 
131 |     @classmethod
132 |     def get_models(cls, index, as_class=False):
133 |         '''
134 |         Returns the list of models defined for this index.
135 |         :param index: index name.
136 |         :param as_class: set to True to return the model as a model object instead of as a string.
137 |         '''
138 |         try:
139 |             return cls._index_to_model[index] if as_class else cls._idx_name_to_mdl_to_mdlidx[index].keys()
140 |         except KeyError:
141 |             raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))
142 | 
143 |     @classmethod
144 |     def get_model_indices(cls, index):
145 |         '''
146 |         Returns the list of model indices (i.e. ModelIndex objects) defined for this index.
147 |         :param index: index name.
148 |         '''
149 |         try:
150 |             return cls._idx_name_to_mdl_to_mdlidx[index].values()
151 |         except KeyError:
152 |             raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index))
153 | 
154 |     @classmethod
155 |     def map_raw_results(cls, raw_results, instance=None):
156 |         '''
157 |         Maps raw results to database model objects.
158 |         :param raw_results: list raw results as returned from elasticsearch-dsl-py.
159 |         :param instance: Bungiesearch instance if you want to make use of `.only()` or `optmize_queries` as defined in the ModelIndex.
160 |         :return: list of mapped results in the *same* order as returned by elasticsearch.
161 |         '''
162 |         # Let's iterate over the results and determine the appropriate mapping.
163 |         model_results = defaultdict(list)
164 |         # Initializing the list to the number of returned results. This allows us to restore each item in its position.
165 |         if hasattr(raw_results, 'hits'):
166 |             results = [None] * len(raw_results.hits)
167 |         else:
168 |             results = [None] * len(raw_results)
169 |         found_results = {}
170 |         for pos, result in enumerate(raw_results):
171 |             model_name = result.meta.doc_type
172 |             if model_name not in Bungiesearch._model_name_to_index or result.meta.index not in Bungiesearch._model_name_to_index[model_name]:
173 |                 logger.warning('Returned object of type {} ({}) is not defined in the settings, or is not associated to the same index as in the settings.'.format(model_name, result))
174 |                 results[pos] = result
175 |             else:
176 |                 meta = Bungiesearch.get_model_index(model_name).Meta
177 |                 model_results['{}.{}'.format(result.meta.index, model_name)].append(result.meta.id)
178 |                 found_results['{1.meta.index}.{0}.{1.meta.id}'.format(model_name, result)] = (pos, result.meta)
179 | 
180 |         # Now that we have model ids per model name, let's fetch everything at once.
181 |         for ref_name, ids in iteritems(model_results):
182 |             index_name, model_name = ref_name.split('.')
183 |             model_idx = Bungiesearch._idx_name_to_mdl_to_mdlidx[index_name][model_name]
184 |             model_obj = model_idx.get_model()
185 |             items = model_obj.objects.filter(pk__in=ids)
186 |             if instance:
187 |                 if instance._only == '__model' or model_idx.optimize_queries:
188 |                     desired_fields = model_idx.fields_to_fetch
189 |                 elif instance._only == '__fields':
190 |                     desired_fields = instance._fields
191 |                 else:
192 |                     desired_fields = instance._only
193 | 
194 |                 if desired_fields: # Prevents setting the database fetch to __fields but not having specified any field to elasticsearch.
195 |                     items = items.only(
196 |                         *[field.name
197 |                           for field in model_obj._meta.get_fields()
198 |                           # For complete backwards compatibility, you may want to exclude
199 |                           # GenericForeignKey from the results.
200 |                           if field.name in desired_fields and \
201 |                              not (field.many_to_one and field.related_model is None)
202 |                          ]
203 |                     )
204 |             # Let's reposition each item in the results and set the _searchmeta meta information.
205 |             for item in items:
206 |                 pos, meta = found_results['{}.{}.{}'.format(index_name, model_name, item.pk)]
207 |                 item._searchmeta = meta
208 |                 results[pos] = item
209 | 
210 |         return results
211 | 
212 |     def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs):
213 |         '''
214 |         Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
215 |         if it has already been initialized. Otherwise, creates a new one.
216 | 
217 |         If no parameters are passed, everything is determined from the Django settings.
218 | 
219 |         :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
220 |         :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
221 |         :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
222 |         :param timeout: Timeout used in the connection.
223 |         :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
224 |         :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
225 |         '''
226 | 
227 |         Bungiesearch.__load_settings__()
228 | 
229 |         urls = urls or Bungiesearch.BUNGIE['URLS']
230 |         if not timeout:
231 |             timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT)
232 | 
233 |         search_keys = ['using', 'index', 'doc_type', 'extra']
234 |         search_settings, es_settings = {}, {}
235 |         for k, v in iteritems(kwargs):
236 |             if k in search_keys:
237 |                 search_settings[k] = v
238 |             else:
239 |                 es_settings[k] = v
240 | 
241 |         if not es_settings:
242 |             # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
243 |             es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})
244 | 
245 |         # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
246 |         cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
247 |         es_instance = None
248 |         if not force_new:
249 |             if cache_key in Bungiesearch._cached_es_instances:
250 |                 es_instance = Bungiesearch._cached_es_instances[cache_key]
251 | 
252 |         if not es_instance:
253 |             es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
254 |             Bungiesearch._cached_es_instances[cache_key] = es_instance
255 | 
256 |         if 'using' not in search_settings:
257 |             search_settings['using'] = es_instance
258 | 
259 |         super(Bungiesearch, self).__init__(**search_settings)
260 | 
261 |         # Creating instance attributes.
262 |         self._only = [] # Stores the exact fields to fetch from the database when mapping.
263 |         self.results = [] # Store the mapped and unmapped results.
264 |         self._raw_results_only = raw_results
265 | 
266 |     def _clone(self):
267 |         '''
268 |         Must clone additional fields to those cloned by elasticsearch-dsl-py.
269 |         '''
270 |         instance = super(Bungiesearch, self)._clone()
271 |         instance._raw_results_only = self._raw_results_only
272 |         return instance
273 | 
274 |     def get_es_instance(self):
275 |         '''
276 |         Returns the low level elasticsearch instance to perform low level operations.
277 |         '''
278 |         return self._using
279 | 
280 |     def execute_raw(self):
281 |         self.raw_results = super(Bungiesearch, self).execute()
282 | 
283 |     def execute(self, return_results=True):
284 |         '''
285 |         Executes the query and attempts to create model objects from results.
286 |         '''
287 |         if self.results:
288 |             return self.results if return_results else None
289 | 
290 |         self.execute_raw()
291 | 
292 |         if self._raw_results_only:
293 |             self.results = self.raw_results
294 |         else:
295 |             self.map_results()
296 | 
297 |         if return_results:
298 |             return self.results
299 | 
300 |     def map_results(self):
301 |         '''
302 |         Maps raw results and store them.
303 |         '''
304 |         self.results = Bungiesearch.map_raw_results(self.raw_results, self)
305 | 
306 |     def only(self, *fields):
307 |         '''
308 |         Restricts the fields to be fetched when mapping. Set to `__model` to fetch all fields define in the ModelIndex.
309 |         '''
310 |         s = self._clone()
311 |         if len(fields) == 1 and fields[0] == '__model':
312 |             s._only = '__model'
313 |         else:
314 |             s._only = fields
315 |         return s
316 | 
317 |     def __iter__(self):
318 |         '''
319 |         Allows iterating on the response.
320 |         '''
321 |         self.execute()
322 |         return iter(self.results)
323 | 
324 |     def __len__(self):
325 |         '''
326 |         Return elasticsearch-dsl-py count.
327 |         '''
328 |         return self.count()
329 | 
330 |     def __getitem__(self, key):
331 |         '''
332 |         Overwriting the step in slice. It is used to set the results either as elasticsearch-dsl-py response object, or
333 |         attempt to fetch the Django model instance.
334 |         :warning: Getting an item will execute this search. Any search operation or field setting *must* be done prior to getting an item.
335 |         '''
336 |         if isinstance(key, slice):
337 |             if key.step is not None:
338 |                 self._raw_results_only = key.step
339 |                 if key.start is not None and key.stop is not None:
340 |                     single_item = key.start - key.stop == -1
341 |                 elif key.start is None and key.stop == 1:
342 |                     single_item = True
343 |                 else:
344 |                     single_item = False
345 |                 key = slice(key.start, key.stop)
346 |             else:
347 |                 single_item = False
348 |         else:
349 |             single_item = True
350 |         results = super(Bungiesearch, self).__getitem__(key).execute()
351 |         if single_item:
352 |             try:
353 |                 return results[0]
354 |             except IndexError:
355 |                 return []
356 |         return results
357 | 
358 |     def hook_alias(self, alias, model_obj=None):
359 |         '''
360 |         Returns the alias function, if it exists and if it can be applied to this model.
361 |         '''
362 |         try:
363 |             search_alias = self._alias_hooks[alias]
364 |         except KeyError:
365 |             raise AttributeError('Could not find search alias named {}. Is this alias defined in BUNGIESEARCH["ALIASES"]?'.format(alias))
366 |         else:
367 |             if search_alias._applicable_models and \
368 |                 ((model_obj and model_obj not in search_alias._applicable_models) or \
369 |                  not any([app_model_obj.__name__ in self._doc_type for app_model_obj in search_alias._applicable_models])):
370 |                     raise ValueError('Search alias {} is not applicable to model/doc_types {}.'.format(alias, model_obj if model_obj else self._doc_type))
371 |             return search_alias.prepare(self, model_obj).alias_for
372 | 
373 |     def __getattr__(self, alias):
374 |         '''
375 |         Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__),
376 |         this is only called as a last resort in case the attribute is not found.
377 |         '''
378 |         return self.hook_alias(alias)
379 | 


--------------------------------------------------------------------------------
/bungiesearch/aliases.py:
--------------------------------------------------------------------------------
 1 | class SearchAlias(object):
 2 |     '''
 3 |     Defines search aliases for specific models. Essentially works like Django Managers but for Bungiesearch.
 4 |     These work for both managers and bungiesearch instances. See the docs (and if they aren't clear, open an issue).
 5 |     '''
 6 |     def __init__(self):
 7 |         # Introspect the model, adding/removing fields as needed.
 8 |         # Adds/Excludes should happen only if the fields are not already
 9 |         # defined in `self.fields`.
10 |         self._classname = type(self).__name__
11 |         try:
12 |             _meta = getattr(self, 'Meta')
13 |         except AttributeError:
14 |             self._applicable_models = []
15 |             self.alias_name = self._classname.lower()
16 |         else:
17 |             self._applicable_models = getattr(_meta, 'models', None)
18 |             self.alias_name = getattr(_meta, 'alias_name', self._classname.lower())
19 |         self.search_instance = None
20 |         self.model = None
21 | 
22 |     def _clone(self):
23 |         s = self.__class__()
24 |         s._classname = self._classname
25 |         s._applicable_models = self._applicable_models
26 |         s.alias_name = self.alias_name
27 |         return s
28 | 
29 |     def prepare(self, search_instance, model_obj):
30 |         s = self._clone()
31 |         s.search_instance = search_instance
32 |         s.model = model_obj
33 |         return s
34 | 
35 |     def alias_for(self, **kwargs):
36 |         raise NotImplementedError('{} does not provide an implementation for alias_for.'.format(self._classname))
37 | 
38 |     def get_model(self):
39 |         if self.model:
40 |             return self.model
41 |         if self.search_instance._doc_type and len(self.search_instance._doc_type) == 1:
42 |             idxes = self.search_instance._model_name_to_model_idx[self.search_instance._doc_type[0]]
43 |             first_mdl = idxes[0].get_model()
44 |             if all(mdlidx.get_model() == first_mdl for mdlidx in idxes[1:]):
45 |                 return first_mdl
46 |             raise ValueError('SearchAlias {} is associated to more than one index, and the model is differs between indices!')
47 |         raise ValueError('Instance associated to zero doc types or more than one.')
48 | 


--------------------------------------------------------------------------------
/bungiesearch/fields.py:
--------------------------------------------------------------------------------
  1 | from django.template import Context, loader
  2 | from django.template.defaultfilters import striptags
  3 | from six import iteritems
  4 | 
  5 | from elasticsearch_dsl.analysis import Analyzer
  6 | 
  7 | 
  8 | class AbstractField(object):
  9 |     '''
 10 |     Represents an elasticsearch index field and values from given objects.
 11 |     Currently does not support binary fields, but those can be created by manually providing a dictionary.
 12 | 
 13 |     Values are extracted using the `model_attr` or `eval_as` attribute.
 14 |     '''
 15 |     meta_fields = ['_index', '_uid', '_type', '_id']
 16 |     common_fields = ['index_name', 'store', 'index', 'boost', 'null_value', 'copy_to', 'type', 'fields']
 17 |     @property
 18 |     def fields(self):
 19 |         try:
 20 |             return self.fields
 21 |         except:
 22 |             raise NotImplementedError('Allowed fields are not defined.')
 23 | 
 24 |     @property
 25 |     def coretype(self):
 26 |         try:
 27 |             return self.coretype
 28 |         except:
 29 |             raise NotImplementedError('Core type is not defined!')
 30 | 
 31 |     @property
 32 |     def defaults(self):
 33 |         '''
 34 |         Stores default values.
 35 |         '''
 36 |         try:
 37 |             return self.defaults
 38 |         except:
 39 |             return {}
 40 | 
 41 |     def __init__(self, **args):
 42 |         '''
 43 |         Performs several checks to ensure that the provided attributes are valid. Will not check their values.
 44 |         '''
 45 |         if isinstance(self.coretype, list):
 46 |             if 'coretype' not in args:
 47 |                 raise KeyError('{} can be represented as one of the following types: {}. Specify which to select as the `coretype` parameter.'.format(unicode(self), ', '.join(self.coretype)))
 48 |             if args['coretype'] not in self.coretype:
 49 |                 raise KeyError('Core type {} is not supported by {}.'.format(args['coretype'], unicode(self)))
 50 |             self.type = args.pop('coretype')
 51 |         else:
 52 |             self.type = self.coretype
 53 | 
 54 |         self.model_attr = args.pop('model_attr', None)
 55 |         self.eval_func = args.pop('eval_as', None)
 56 |         self.template_name = args.pop('template', None)
 57 | 
 58 |         for attr, value in iteritems(args):
 59 |             if attr not in self.fields and attr not in AbstractField.common_fields:
 60 |                 raise KeyError('Attribute `{}` is not allowed for core type {}.'.format(attr, self.coretype))
 61 |             setattr(self, attr, value)
 62 | 
 63 |         for attr, value in iteritems(self.defaults):
 64 |             if not hasattr(self, attr):
 65 |                 setattr(self, attr, value)
 66 | 
 67 |     def value(self, obj):
 68 |         '''
 69 |         Computes the value of this field to update the index.
 70 |         :param obj: object instance, as a dictionary or as a model instance.
 71 |         '''
 72 |         if self.template_name:
 73 |             t = loader.select_template([self.template_name])
 74 |             return t.render(Context({'object': obj}))
 75 | 
 76 |         if self.eval_func:
 77 |             try:
 78 |                 return eval(self.eval_func)
 79 |             except Exception as e:
 80 |                 raise type(e)('Could not compute value of {} field (eval_as=`{}`): {}.'.format(unicode(self), self.eval_func, unicode(e)))
 81 | 
 82 |         elif self.model_attr:
 83 |             if isinstance(obj, dict):
 84 |                 return obj[self.model_attr]
 85 |             current_obj = getattr(obj, self.model_attr)
 86 | 
 87 |             if callable(current_obj):
 88 |                 return current_obj()
 89 |             else:
 90 |                 return current_obj
 91 | 
 92 |         else:
 93 |             raise KeyError('{0} gets its value via a model attribute, an eval function, a template, or is prepared in a method '
 94 |                            'call but none of `model_attr`, `eval_as,` `template,` `prepare_{0}` is provided.'.format(unicode(self)))
 95 | 
 96 |     def json(self):
 97 |         json = {}
 98 |         for attr, val in iteritems(self.__dict__):
 99 |             if attr in ('eval_func', 'model_attr', 'template_name'):
100 |                 continue
101 |             elif attr in ('analyzer', 'index_analyzer', 'search_analyzer') and isinstance(val, Analyzer):
102 |                 json[attr] = val.to_dict()
103 |             else:
104 |                 json[attr] = val
105 | 
106 |         return json
107 | 
108 | # All the following definitions could probably be done with better polymorphism.
109 | class StringField(AbstractField):
110 |     coretype = 'string'
111 |     fields = ['doc_values', 'term_vector', 'norms', 'index_options', 'analyzer', 'index_analyzer', 'search_analyzer', 'include_in_all', 'ignore_above', 'position_offset_gap', 'fielddata', 'similarity']
112 |     defaults = {'analyzer': 'snowball'}
113 | 
114 |     def value(self, obj):
115 |         val = super(StringField, self).value(obj)
116 |         if val is None:
117 |             return None
118 |         return striptags(val)
119 | 
120 |     def __unicode__(self):
121 |         return 'StringField'
122 | 
123 | class NumberField(AbstractField):
124 |     coretype = ['float', 'double', 'byte', 'short', 'integer', 'long']
125 |     fields = ['doc_values', 'precision_step', 'include_in_all', 'ignore_malformed', 'coerce']
126 | 
127 |     def __unicode__(self):
128 |         return 'NumberField'
129 | 
130 | class DateField(AbstractField):
131 |     coretype = 'date'
132 |     fields = ['format', 'doc_values', 'precision_step', 'include_in_all', 'ignore_malformed']
133 | 
134 |     def __unicode__(self):
135 |         return 'DateField'
136 | 
137 | class BooleanField(AbstractField):
138 |     coretype = 'boolean'
139 |     fields = [] # No specific fields.
140 | 
141 |     def __unicode__(self):
142 |         return 'BooleanField'
143 | 
144 | # Correspondence between a Django field and an elasticsearch field.
145 | def django_field_to_index(field, **attr):
146 |     '''
147 |     Returns the index field type that would likely be associated with each Django type.
148 |     '''
149 | 
150 |     dj_type = field.get_internal_type()
151 | 
152 |     if dj_type in ('DateField', 'DateTimeField'):
153 |         return DateField(**attr)
154 |     elif dj_type in ('BooleanField', 'NullBooleanField'):
155 |         return BooleanField(**attr)
156 |     elif dj_type in ('DecimalField', 'FloatField'):
157 |         return NumberField(coretype='float', **attr)
158 |     elif dj_type in ('PositiveSmallIntegerField', 'SmallIntegerField'):
159 |         return NumberField(coretype='short', **attr)
160 |     elif dj_type in ('IntegerField', 'PositiveIntegerField', 'AutoField'):
161 |         return NumberField(coretype='integer', **attr)
162 |     elif dj_type in ('BigIntegerField'):
163 |         return NumberField(coretype='long', **attr)
164 | 
165 |     return StringField(**attr)
166 | 


--------------------------------------------------------------------------------
/bungiesearch/indices.py:
--------------------------------------------------------------------------------
  1 | from six import iteritems, text_type
  2 | 
  3 | from elasticsearch_dsl.analysis import Analyzer
  4 | 
  5 | from .fields import AbstractField, django_field_to_index
  6 | from .logger import logger
  7 | 
  8 | 
  9 | class ModelIndex(object):
 10 |     '''
 11 |     Introspects a model to generate an indexable mapping and methods to extract objects.
 12 |     Supports custom fields, including Python code, and all elasticsearch field types (apart from binary type).
 13 | 
 14 |     ModelIndex does efficient querying by only fetching from the database fields which are to be indexed.
 15 | 
 16 |     How to create an index?
 17 | 
 18 |     1. Create a class which inherits from ModelIndex.
 19 |     2. Define custom indexed fields as class attributes. Values must be instances AbstractField. Important info in 3b.
 20 |     3. Define a `Meta` subclass, which must contain at least `model` as a class attribute.
 21 |         a. Optional class attributes: `fields`, `excludes` and `additional_fields`.
 22 |         b. If custom indexed field requires model attributes which are not in the difference between `fields` and `excludes`, these must be defined in `additional_fields`.
 23 |     '''
 24 |     def __init__(self):
 25 |         # Introspect the model, adding/removing fields as needed.
 26 |         # Adds/Excludes should happen only if the fields are not already
 27 |         # defined in `self.fields`.
 28 |         try:
 29 |             _meta = getattr(self, 'Meta')
 30 |         except AttributeError:
 31 |             raise AttributeError('ModelIndex {} does not contain a Meta class.'.format(self.__class__.__name__))
 32 | 
 33 |         self.model = getattr(_meta, 'model', None)
 34 |         self.fields = {}
 35 |         fields = getattr(_meta, 'fields', [])
 36 |         excludes = getattr(_meta, 'exclude', [])
 37 |         hotfixes = getattr(_meta, 'hotfixes', {})
 38 |         additional_fields = getattr(_meta, 'additional_fields', [])
 39 |         id_field = getattr(_meta, 'id_field', 'id')
 40 |         self.updated_field = getattr(_meta, 'updated_field', None)
 41 |         self.optimize_queries = getattr(_meta, 'optimize_queries', False)
 42 |         self.is_default = getattr(_meta, 'default', True)
 43 |         self.indexing_query = getattr(_meta, 'indexing_query', None)
 44 | 
 45 |         # Add in fields from the model.
 46 |         self.fields.update(self._get_fields(fields, excludes, hotfixes))
 47 |         # Elasticsearch uses '_id' to identify items uniquely, so let's duplicate that field.
 48 |         # We're duplicating it in order for devs to still perform searches on `.id` as expected.
 49 |         self.fields_to_fetch = list(set(self.fields.keys()).union(additional_fields))
 50 | 
 51 |         # Adding or updating the fields which are defined at class level.
 52 |         for cls_attr, obj in iteritems(self.__class__.__dict__):
 53 |             if not isinstance(obj, AbstractField):
 54 |                 continue
 55 | 
 56 |             if cls_attr in self.fields:
 57 |                 logger.info('Overwriting implicitly defined model field {} ({}) its explicit definition: {}.'.format(cls_attr, text_type(self.fields[cls_attr]), text_type(obj)))
 58 |             self.fields[cls_attr] = obj
 59 | 
 60 |         self.fields['_id'] = self.fields[id_field]
 61 | 
 62 |     def matches_indexing_condition(self, item):
 63 |         '''
 64 |         Returns True by default to index all documents.
 65 |         '''
 66 |         return True
 67 | 
 68 |     def get_model(self):
 69 |         return self.model
 70 | 
 71 |     def get_mapping(self, meta_fields=True):
 72 |         '''
 73 |         Returns the mapping for the index as a dictionary.
 74 | 
 75 |         :param meta_fields: Also include elasticsearch meta fields in the dictionary.
 76 |         :return: a dictionary which can be used to generate the elasticsearch index mapping for this doctype.
 77 |         '''
 78 |         return {'properties': dict((name, field.json()) for name, field in iteritems(self.fields) if meta_fields or name not in AbstractField.meta_fields)}
 79 | 
 80 |     def collect_analysis(self):
 81 |         '''
 82 |         :return: a dictionary which is used to get the serialized analyzer definition from the analyzer class.
 83 |         '''
 84 |         analysis = {}
 85 |         for field in self.fields.values():
 86 |             for analyzer_name in ('analyzer', 'index_analyzer', 'search_analyzer'):
 87 |                 if not hasattr(field, analyzer_name):
 88 |                     continue
 89 | 
 90 |                 analyzer = getattr(field, analyzer_name)
 91 | 
 92 |                 if not isinstance(analyzer, Analyzer):
 93 |                     continue
 94 | 
 95 |                 definition = analyzer.get_analysis_definition()
 96 |                 if definition is None:
 97 |                     continue
 98 | 
 99 |                 for key in definition:
100 |                     analysis.setdefault(key, {}).update(definition[key])
101 | 
102 |         return analysis
103 | 
104 |     def serialize_object(self, obj, obj_pk=None):
105 |         '''
106 |         Serializes an object for it to be added to the index.
107 | 
108 |         :param obj: Object to be serialized. Optional if obj_pk is passed.
109 |         :param obj_pk: Object primary key. Superseded by `obj` if available.
110 |         :return: A dictionary representing the object as defined in the mapping.
111 |         '''
112 |         if not obj:
113 |             try:
114 |                 # We're using `filter` followed by `values` in order to only fetch the required fields.
115 |                 obj = self.model.objects.filter(pk=obj_pk).values(*self.fields_to_fetch)[0]
116 |             except Exception as e:
117 |                 raise ValueError('Could not find object of primary key = {} in model {} (model index class {}). (Original exception: {}.)'.format(obj_pk, self.model, self.__class__.__name__, e))
118 | 
119 |         serialized_object = {}
120 | 
121 |         for name, field in iteritems(self.fields):
122 |             if hasattr(self, "prepare_%s" % name):
123 |                 value = getattr(self, "prepare_%s" % name)(obj)
124 |             else:
125 |                 value = field.value(obj)
126 | 
127 |             serialized_object[name] = value
128 | 
129 |         return serialized_object
130 | 
131 |     def _get_fields(self, fields, excludes, hotfixes):
132 |         '''
133 |         Given any explicit fields to include and fields to exclude, add
134 |         additional fields based on the associated model. If the field needs a hotfix, apply it.
135 |         '''
136 |         final_fields = {}
137 |         fields = fields or []
138 |         excludes = excludes or []
139 | 
140 |         for f in self.model._meta.fields:
141 |             # If the field name is already present, skip
142 |             if f.name in self.fields:
143 |                 continue
144 | 
145 |             # If field is not present in explicit field listing, skip
146 |             if fields and f.name not in fields:
147 |                 continue
148 | 
149 |             # If field is in exclude list, skip
150 |             if excludes and f.name in excludes:
151 |                 continue
152 | 
153 |             # If field is a relation, skip.
154 |             if getattr(f, 'rel'):
155 |                 continue
156 | 
157 |             attr = {'model_attr': f.name}
158 |             if f.has_default():
159 |                 attr['null_value'] = f.default
160 | 
161 |             if f.name in hotfixes:
162 |                 attr.update(hotfixes[f.name])
163 | 
164 |             final_fields[f.name] = django_field_to_index(f, **attr)
165 | 
166 |         return final_fields
167 | 
168 |     def __str__(self):
169 |         return '<{0.__class__.__name__}:{0.model.__name__}>'.format(self)
170 | 


--------------------------------------------------------------------------------
/bungiesearch/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | logger = logging.getLogger('bungiesearch')
4 | 


--------------------------------------------------------------------------------
/bungiesearch/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/bungiesearch/management/__init__.py


--------------------------------------------------------------------------------
/bungiesearch/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 | '''
2 | Commands allow you to manage the index.
3 | '''


--------------------------------------------------------------------------------
/bungiesearch/management/commands/_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def add_arguments(obj, parser):
 4 |     parser.add_argument(
 5 |         '--noinput',
 6 |         action='store_false',
 7 |         dest='interactive',
 8 |         default=True,
 9 |         help='If provided, no prompts will be issued to the user and the data will be wiped out'
10 |     )
11 |     parser.add_argument(
12 |         '--guilty-as-charged',
13 |         action='store_true',
14 |         dest='confirmed',
15 |         default=False,
16 |         help='Flag needed to confirm the clear index.'
17 |     )
18 |     parser.add_argument(
19 |         '--timeout',
20 |         action='store',
21 |         dest='timeout',
22 |         default=None,
23 |         type=int,
24 |         help='Specify the timeout in seconds for each operation.'
25 |     )
26 | 


--------------------------------------------------------------------------------
/bungiesearch/management/commands/clear_index.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from django.core.management import call_command
 4 | from django.core.management.base import BaseCommand
 5 | from django.utils import six
 6 | 
 7 | from ._utils import add_arguments
 8 | 
 9 | 
10 | class Command(BaseCommand):
11 |     help = 'Clears the search index of its contents.'
12 |     add_arguments = add_arguments
13 | 
14 |     def handle(self, **options):
15 |         if options.get('interactive', True):
16 |             print('WARNING: This will irreparably remove EVERYTHING from your search index.')
17 |             print('Your choices after this are to restore from backups or rebuild via the `rebuild_index` command.')
18 | 
19 |             yes_or_no = six.moves.input('Are you sure you wish to continue? [y/N] ')
20 |             print
21 | 
22 |             if yes_or_no not in ['y', 'N']:
23 |                 print('No action taken: please type either "y" or "N".')
24 |                 sys.exit()
25 | 
26 |             if yes_or_no == 'N':
27 |                 print('No action taken.')
28 |                 sys.exit()
29 | 
30 |             if not options['confirmed']:
31 |                 print('No action taken: you must provide the --guilty-as-charged flag.')
32 |                 sys.exit()
33 | 
34 |         call_command('search_index', action='delete', **options)
35 |         call_command('search_index', action='create', **options)
36 | 


--------------------------------------------------------------------------------
/bungiesearch/management/commands/rebuild_index.py:
--------------------------------------------------------------------------------
 1 | from django.core.management import call_command
 2 | from django.core.management.base import BaseCommand
 3 | 
 4 | from ._utils import add_arguments
 5 | 
 6 | 
 7 | class Command(BaseCommand):
 8 |     help = "Rebuilds the search index by clearing the search index and then performing an update."
 9 |     add_arguments = add_arguments
10 | 
11 |     def handle(self, **options):
12 |         call_command('clear_index', **options)
13 |         call_command('search_index', action='update', **options)
14 | 


--------------------------------------------------------------------------------
/bungiesearch/management/commands/search_index.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | 
  3 | from django.core.management.base import BaseCommand
  4 | from six import iteritems
  5 | 
  6 | from ... import Bungiesearch
  7 | from ...logger import logger
  8 | from ...utils import update_index
  9 | 
 10 | 
 11 | class Command(BaseCommand):
 12 |     args = ''
 13 |     help = 'Manage search index.'
 14 | 
 15 |     def add_arguments(self, parser):
 16 |         parser.add_argument(
 17 |             '--create',
 18 |             action='store_const',
 19 |             dest='action',
 20 |             const='create',
 21 |             help='Create the index specified in the settings with the mapping generating from the search indices.'
 22 |         )
 23 |         parser.add_argument(
 24 |             '--update',
 25 |             action='store_const',
 26 |             dest='action',
 27 |             const='update',
 28 |             help='Update the index specified in the settings with the mapping generating from the search indices.')
 29 |         parser.add_argument(
 30 |             '--update-mapping',
 31 |             action='store_const',
 32 |             dest='action',
 33 |             const='update-mapping',
 34 |             help='Update the mapping of specified models (or all models) on the index specified in the settings.')
 35 |         parser.add_argument(
 36 |             '--delete',
 37 |             action='store_const',
 38 |             dest='action',
 39 |             const='delete',
 40 |             help='Delete the index specified in the settings. Requires the "--guilty-as-charged" flag.')
 41 |         parser.add_argument(
 42 |             '--delete-mapping',
 43 |             action='store_const',
 44 |             dest='action',
 45 |             const='delete-mapping',
 46 |             help='Delete the mapping of specified models (or all models) on the index specified in the settings. Requires the "--guilty-as-charged" flag.')
 47 |         parser.add_argument(
 48 |             '--guilty-as-charged',
 49 |             action='store_true',
 50 |             dest='confirmed',
 51 |             default=False,
 52 |             help='Flag needed to delete an index.')
 53 |         parser.add_argument(
 54 |             '--models',
 55 |             action='store',
 56 |             dest='models',
 57 |             default=None,
 58 |             help='Models to be updated, separated by commas. If none are specified, then all models defined in the index will be updated.')
 59 |         parser.add_argument(
 60 |             '--index',
 61 |             action='store',
 62 |             dest='index',
 63 |             default=None,
 64 |             help='Specify the index for which to apply the action, as defined in BUNGIESEARCH.INDEXES of settings. Defaults to using all indices.')
 65 |         parser.add_argument(
 66 |             '--bulk-size',
 67 |             action='store',
 68 |             dest='bulk_size',
 69 |             default=100,
 70 |             type=int,
 71 |             help='Specify the number of items to be updated together.')
 72 |         parser.add_argument(
 73 |             '--num-docs',
 74 |             action='store',
 75 |             dest='num_docs',
 76 |             default=-1,
 77 |             type=int,
 78 |             help='Specify the maximum number of items to be indexed. By default will index the whole model.')
 79 |         parser.add_argument(
 80 |             '--start',
 81 |             action='store',
 82 |             dest='start_date',
 83 |             default=None,
 84 |             type=str,
 85 |             help='Specify the start date and time of documents to be indexed.')
 86 |         parser.add_argument(
 87 |             '--end',
 88 |             action='store',
 89 |             dest='end_date',
 90 |             default=None,
 91 |             type=str,
 92 |             help='Specify the end date and time of documents to be indexed.')
 93 |         parser.add_argument(
 94 |             '--timeout',
 95 |             action='store',
 96 |             dest='timeout',
 97 |             default=None,
 98 |             type=int,
 99 |             help='Specify the timeout in seconds for each operation.')
100 | 
101 |     def handle(self, *args, **options):
102 |         src = Bungiesearch(timeout=options.get('timeout'))
103 |         es = src.get_es_instance()
104 | 
105 |         if not options['action']:
106 |             raise ValueError('No action specified. Must be one of "create", "update" or "delete".')
107 | 
108 |         if options['action'].startswith('delete'):
109 |             if not options['confirmed']:
110 |                 raise ValueError('If you know what a delete operation does (on index or mapping), add the --guilty-as-charged flag.')
111 |             if options['action'] == 'delete':
112 |                 if options['index']:
113 |                     indices = [options['index']]
114 |                 else:
115 |                     indices = src.get_indices()
116 | 
117 |                 for index in indices:
118 |                     logger.warning('Deleting elastic search index {}.'.format(index))
119 |                     es.indices.delete(index=index, ignore=404)
120 | 
121 |             else:
122 |                 index_to_doctypes = defaultdict(list)
123 |                 if options['models']:
124 |                     logger.info('Deleting mapping for models {} on index {}.'.format(options['models'], index))
125 |                     for model_name in options['models'].split():
126 |                         for index in src.get_index(model_name):
127 |                             index_to_doctypes[index].append(model_name)
128 |                 elif options['index']:
129 |                     index = options['index']
130 |                     logger.info('Deleting mapping for all models on index {}.'.format(index))
131 |                     index_to_doctypes[index] = src.get_models(index)
132 |                 else:
133 |                     for index in src.get_indices():
134 |                         index_to_doctypes[index] = src.get_models(index)
135 |                     logger.info('Deleting mapping for all models ({}) on all indices ({}).'.format(index_to_doctypes.values(), index_to_doctypes.keys()))
136 | 
137 |                 for index, doctype_list in iteritems(index_to_doctypes):
138 |                     es.indices.delete_mapping(index, ','.join(doctype_list), params=None)
139 | 
140 |         elif options['action'] == 'create':
141 |             if options['index']:
142 |                 indices = [options['index']]
143 |             else:
144 |                 indices = src.get_indices()
145 |             for index in indices:
146 |                 mapping = {}
147 |                 analysis = {'analyzer': {}, 'tokenizer': {}, 'filter': {}}
148 | 
149 |                 for mdl_idx in src.get_model_indices(index):
150 |                     mapping[mdl_idx.get_model().__name__] = mdl_idx.get_mapping(meta_fields=False)
151 | 
152 |                     mdl_analysis = mdl_idx.collect_analysis()
153 |                     for key in analysis.keys():
154 |                         value = mdl_analysis.get(key)
155 |                         if value is not None:
156 |                             analysis[key].update(value)
157 | 
158 |                 logger.info('Creating index {} with {} doctypes.'.format(index, len(mapping)))
159 |                 es.indices.create(index=index, body={'mappings': mapping, 'settings': {'analysis': analysis}})
160 | 
161 |             es.cluster.health(index=','.join(indices), wait_for_status='green', timeout='30s')
162 | 
163 |         elif options['action'] == 'update-mapping':
164 |             if options['index']:
165 |                 indices = [options['index']]
166 |             else:
167 |                 indices = src.get_indices()
168 | 
169 |             if options['models']:
170 |                 models = options['models'].split(',')
171 |             else:
172 |                 models = []
173 | 
174 |             for index in indices:
175 |                 for model_name in src._idx_name_to_mdl_to_mdlidx[index]:
176 |                     if models and model_name not in models:
177 |                         continue
178 |                     logger.info('Updating mapping of model/doctype {} on index {}.'.format(model_name, index))
179 |                     try:
180 |                         es.indices.put_mapping(model_name, src._idx_name_to_mdl_to_mdlidx[index][model_name].get_mapping(), index=index)
181 |                     except Exception as e:
182 |                         print(e)
183 |                         if raw_input('Something terrible happened! Type "abort" to stop updating the mappings: ') == 'abort':
184 |                             raise e
185 |                         print('Continuing.')
186 | 
187 |         else:
188 |             if options['index']:
189 |                 indices = options['index']
190 |             else:
191 |                 indices = src.get_indices()
192 |             if options['models']:
193 |                 model_names = options['models'].split(',')
194 |             else:
195 |                 model_names = [model for index in indices for model in src.get_models(index)]
196 | 
197 |             logger.info('Updating models {} on indices {}.'.format(model_names, indices))
198 | 
199 |             # Update index.
200 |             for model_name in model_names:
201 |                 if src.get_model_index(model_name).indexing_query is not None:
202 |                     update_index(src.get_model_index(model_name).indexing_query, model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date'])
203 |                 else:
204 |                     update_index(src.get_model_index(model_name).get_model().objects.all(), model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date'])
205 | 


--------------------------------------------------------------------------------
/bungiesearch/managers.py:
--------------------------------------------------------------------------------
 1 | from django.conf import settings as dj_settings
 2 | from django.db.models import Manager
 3 | 
 4 | from .logger import logger
 5 | 
 6 | 
 7 | class BungiesearchManager(Manager):
 8 |     model = None
 9 | 
10 |     '''
11 |     A Django manager for integrated search into models.
12 |     '''
13 |     @property
14 |     def search(self):
15 |         from bungiesearch import Bungiesearch
16 |         return Bungiesearch().index(*Bungiesearch.get_index(self.model, via_class=True)).doc_type(self.model.__name__)
17 | 
18 |     def search_index(self, index):
19 |         from bungiesearch import Bungiesearch
20 |         if index not in Bungiesearch.get_index(self.model, via_class=True):
21 |             logger.warning('Model/doctype {} is not present on index {}: search may return no results.'.format(self.model.__name__, index))
22 |         return Bungiesearch().index(index).doc_type(self.model.__name__)
23 | 
24 |     def custom_search(self, index, doc_type):
25 |         '''
26 |         Performs a search on a custom elasticsearch index and mapping. Will not attempt to map result objects.
27 |         '''
28 |         from bungiesearch import Bungiesearch
29 |         return Bungiesearch(raw_results=True).index(index).doc_type(doc_type)
30 | 
31 |     def contribute_to_class(self, cls, name):
32 |         '''
33 |         Sets up the signal processor. Since self.model is not available
34 |         in the constructor, we perform this operation here.
35 |         '''
36 |         super(BungiesearchManager, self).contribute_to_class(cls, name)
37 | 
38 |         from . import Bungiesearch
39 |         from .signals import get_signal_processor
40 |         settings = Bungiesearch.BUNGIE
41 |         if 'SIGNALS' in settings:
42 |             self.signal_processor = get_signal_processor()
43 |             self.signal_processor.setup(self.model)
44 | 
45 |     def __getattr__(self, alias):
46 |         '''
47 |         Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__),
48 |         this is only called as a last resort in case the attribute is not found.
49 |         This function will check whether the given model is allowed to use the proposed alias and will raise an attribute error if not.
50 |         '''
51 |         # Don't treat "private" attrs as possible aliases. This prevents an infinite recursion bug.
52 |         # Similarly, if Bungiesearch is installed but not enabled, raise the expected error
53 |         if alias[0] == '_' or not dj_settings.BUNGIESEARCH:
54 |             raise AttributeError("'{}' object has no attribute '{}'".format(type(self), alias))
55 | 
56 |         return self.search.hook_alias(alias, self.model)
57 | 


--------------------------------------------------------------------------------
/bungiesearch/signals.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from importlib import import_module
 3 | from threading import Lock
 4 | 
 5 | from django.db.models import signals
 6 | 
 7 | from . import Bungiesearch
 8 | from .utils import delete_index_item, update_index
 9 | 
10 | 
11 | def get_signal_processor():
12 |     signals = Bungiesearch.BUNGIE['SIGNALS']
13 |     if 'SIGNAL_CLASS' in signals:
14 |         signal_path = signals['SIGNAL_CLASS'].split('.')
15 |         signal_module = import_module('.'.join(signal_path[:-1]))
16 |         signal_class = getattr(signal_module, signal_path[-1])
17 |     else:
18 |         signal_class = BungieSignalProcessor
19 |     return signal_class()
20 | 
21 | 
22 | class BungieSignalProcessor(object):
23 | 
24 |     __index_lock = Lock()
25 |     __items_to_be_indexed = defaultdict(list)
26 | 
27 |     def post_save_connector(self, sender, instance, **kwargs):
28 |         try:
29 |             Bungiesearch.get_index(sender, via_class=True)
30 |         except KeyError:
31 |             return  # This model is not managed by Bungiesearch.
32 | 
33 |         try:
34 |             buffer_size = Bungiesearch.BUNGIE['SIGNALS']['BUFFER_SIZE']
35 |         except KeyError:
36 |             buffer_size = 100
37 | 
38 |         items = None
39 |         with self.__index_lock:
40 |             self.__items_to_be_indexed[sender].append(instance)
41 |             if len(self.__items_to_be_indexed[sender]) >= buffer_size:
42 |                 items = self.__items_to_be_indexed[sender]
43 |                 # Let's now empty this buffer.
44 |                 self.__items_to_be_indexed[sender] = []
45 | 
46 |         if items:
47 |             update_index(items, sender.__name__, bulk_size=buffer_size)
48 | 
49 |     def pre_delete_connector(self, sender, instance, **kwargs):
50 |         try:
51 |             Bungiesearch.get_index(sender, via_class=True)
52 |         except KeyError:
53 |             return  # This model is not managed by Bungiesearch.
54 | 
55 |         delete_index_item(instance, sender.__name__)
56 | 
57 |     def setup(self, model):
58 |         signals.post_save.connect(self.post_save_connector, sender=model)
59 |         signals.pre_delete.connect(self.pre_delete_connector, sender=model)
60 | 
61 |     def teardown(self, model):
62 |         signals.pre_delete.disconnect(self.pre_delete_connector, sender=model)
63 |         signals.post_save.disconnect(self.post_save_connector, sender=model)
64 | 


--------------------------------------------------------------------------------
/bungiesearch/utils.py:
--------------------------------------------------------------------------------
  1 | from dateutil.parser import parse as parsedt
  2 | from django.utils import timezone
  3 | 
  4 | from elasticsearch.exceptions import NotFoundError
  5 | 
  6 | from . import Bungiesearch
  7 | from .logger import logger
  8 | 
  9 | try:
 10 |     from elasticsearch.helpers import bulk_index
 11 | except ImportError:
 12 |     from elasticsearch.helpers import bulk as bulk_index
 13 | 
 14 | 
 15 | def update_index(model_items, model_name, action='index', bulk_size=100, num_docs=-1, start_date=None, end_date=None, refresh=True):
 16 |     '''
 17 |     Updates the index for the provided model_items.
 18 |     :param model_items: a list of model_items (django Model instances, or proxy instances) which are to be indexed/updated or deleted.
 19 |     If action is 'index', the model_items must be serializable objects. If action is 'delete', the model_items must be primary keys
 20 |     corresponding to obects in the index.
 21 |     :param model_name: doctype, which must also be the model name.
 22 |     :param action: the action that you'd like to perform on this group of data. Must be in ('index', 'delete') and defaults to 'index.'
 23 |     :param bulk_size: bulk size for indexing. Defaults to 100.
 24 |     :param num_docs: maximum number of model_items from the provided list to be indexed.
 25 |     :param start_date: start date for indexing. Must be as YYYY-MM-DD.
 26 |     :param end_date: end date for indexing. Must be as YYYY-MM-DD.
 27 |     :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh
 28 |     immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True.
 29 |     :note: If model_items contain multiple models, then num_docs is applied to *each* model. For example, if bulk_size is set to 5,
 30 |     and item contains models Article and Article2, then 5 model_items of Article *and* 5 model_items of Article2 will be indexed.
 31 |     '''
 32 |     src = Bungiesearch()
 33 | 
 34 |     if action == 'delete' and not hasattr(model_items, '__iter__'):
 35 |         raise ValueError("If action is 'delete', model_items must be an iterable of primary keys.")
 36 | 
 37 |     logger.info('Getting index for model {}.'.format(model_name))
 38 |     for index_name in src.get_index(model_name):
 39 |         index_instance = src.get_model_index(model_name)
 40 |         model = index_instance.get_model()
 41 | 
 42 |         if num_docs == -1:
 43 |             if isinstance(model_items, (list, tuple)):
 44 |                 num_docs = len(model_items)
 45 |             else:
 46 |                 model_items = filter_model_items(index_instance, model_items, model_name, start_date, end_date)
 47 |                 num_docs = model_items.count()
 48 | 
 49 |                 if not model_items.ordered:
 50 |                     model_items = model_items.order_by('pk')
 51 |         else:
 52 |             logger.warning('Limiting the number of model_items to {} to {}.'.format(action, num_docs))
 53 | 
 54 |         logger.info('{} {} documents on index {}'.format(action, num_docs, index_name))
 55 |         prev_step = 0
 56 |         max_docs = num_docs + bulk_size if num_docs > bulk_size else bulk_size + 1
 57 |         for next_step in range(bulk_size, max_docs, bulk_size):
 58 |             logger.info('{}: documents {} to {} of {} total on index {}.'.format(action.capitalize(), prev_step, next_step, num_docs, index_name))
 59 |             data = create_indexed_document(index_instance, model_items[prev_step:next_step], action)
 60 |             bulk_index(src.get_es_instance(), data, index=index_name, doc_type=model.__name__, raise_on_error=True)
 61 |             prev_step = next_step
 62 | 
 63 |         if refresh:
 64 |             src.get_es_instance().indices.refresh(index=index_name)
 65 | 
 66 | 
 67 | def delete_index_item(item, model_name, refresh=True):
 68 |     '''
 69 |     Deletes an item from the index.
 70 |     :param item: must be a serializable object.
 71 |     :param model_name: doctype, which must also be the model name.
 72 |     :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh
 73 |     immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True.
 74 |     '''
 75 |     src = Bungiesearch()
 76 | 
 77 |     logger.info('Getting index for model {}.'.format(model_name))
 78 |     for index_name in src.get_index(model_name):
 79 |         index_instance = src.get_model_index(model_name)
 80 |         item_es_id = index_instance.fields['_id'].value(item)
 81 |         try:
 82 |             src.get_es_instance().delete(index_name, model_name, item_es_id)
 83 |         except NotFoundError as e:
 84 |             logger.warning('NotFoundError: could not delete {}.{} from index {}: {}.'.format(model_name, item_es_id, index_name, str(e)))
 85 | 
 86 |         if refresh:
 87 |             src.get_es_instance().indices.refresh(index=index_name)
 88 | 
 89 | 
 90 | def create_indexed_document(index_instance, model_items, action):
 91 |     '''
 92 |     Creates the document that will be passed into the bulk index function.
 93 |     Either a list of serialized objects to index, or a a dictionary specifying the primary keys of items to be delete.
 94 |     '''
 95 |     data = []
 96 |     if action == 'delete':
 97 |         for pk in model_items:
 98 |             data.append({'_id': pk, '_op_type': action})
 99 |     else:
100 |         for doc in model_items:
101 |             if index_instance.matches_indexing_condition(doc):
102 |                 data.append(index_instance.serialize_object(doc))
103 |     return data
104 | 
105 | 
106 | def filter_model_items(index_instance, model_items, model_name, start_date, end_date):
107 |     ''' Filters the model items queryset based on start and end date.'''
108 |     if index_instance.updated_field is None:
109 |         logger.warning("No updated date field found for {} - not restricting with start and end date".format(model_name))
110 |     else:
111 |         if start_date:
112 |             model_items = model_items.filter(**{'{}__gte'.format(index_instance.updated_field): __str_to_tzdate__(start_date)})
113 |         if end_date:
114 |             model_items = model_items.filter(**{'{}__lte'.format(index_instance.updated_field): __str_to_tzdate__(end_date)})
115 | 
116 |     return model_items
117 | 
118 | 
119 | def __str_to_tzdate__(date_str):
120 |     return timezone.make_aware(parsedt(date_str), timezone.get_current_timezone())
121 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | elasticsearch-dsl>=2.0.0,<3.0.0
2 | elasticsearch>=2.0.0,<3.0.0
3 | python-dateutil
4 | six
5 | 
6 | bungiesearch
7 | coveralls
8 | pytz
9 | 


--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # pass in --cluster as an argument to start a cluster instead of a single node
 3 | set -e
 4 | trap 'jobs -p | xargs kill -9' EXIT
 5 | 
 6 | CLUSTER_URL=http://127.0.0.1:9200
 7 | ES_PATH=elasticsearch
 8 | 
 9 | if [ ${TRAVIS} ]; then
10 |   ES_PATH=./elasticsearch-2.3.0/bin/elasticsearch
11 | fi
12 | 
13 | function has_command() {
14 |   type $1 &> /dev/null
15 | }
16 | 
17 | function is_responding() {
18 |   curl --output /dev/null --fail --silent $1
19 | }
20 | 
21 | function wait_for_cluster() {
22 |   echo 'Waiting on elasticsearch to be ready on port 9200'
23 |   until is_responding "$CLUSTER_URL/_cluster/health?wait_for_nodes=$1&wait_for_status=green"; do
24 |     printf '.'
25 |     sleep 1
26 |   done
27 |   echo
28 | }
29 | 
30 | if ! is_responding $CLUSTER_URL; then
31 |   if ! has_command elasticsearch; then
32 |     echo 'No elasticsearch command found and no server running'
33 |     echo 'Elasticsearch cluster must be running on port 9200'
34 |     exit 1
35 |   else
36 |     if [ "$1" != "--cluster" ]; then
37 |       echo 'Starting single elasticsearch node'
38 |       $ES_PATH -D es.index.number_of_replicas=0 &> /dev/null &
39 |       wait_for_cluster 1
40 |     else
41 |       echo 'Starting elasticsearch cluster with 2 nodes'
42 |       $ES_PATH \
43 |         -D es.cluster.name="mycluster" \
44 |         -D es.node.name="mycluster-node2" \
45 |         -D es.node.master=true \
46 |         -D es.node.data=false \
47 |         -D es.index.number_of_replicas=0 \
48 |         -D es.network.host=127.0.0.1 \
49 |         -D es.foreground=yes \
50 |         -D es.discovery.zen.ping.multicast.enabled=false \
51 |         -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null &
52 | 
53 |       $ES_PATH \
54 |         -D es.cluster.name="mycluster" \
55 |         -D es.node.name="mycluster-node2" \
56 |         -D es.node.master=false \
57 |         -D es.node.data=true \
58 |         -D es.index.number_of_replicas=0 \
59 |         -D es.network.host=127.0.0.1 \
60 |         -D es.foreground=yes \
61 |         -D es.discovery.zen.ping.multicast.enabled=false \
62 |         -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null &
63 | 
64 |       wait_for_cluster 2
65 |     fi
66 |   fi
67 | fi
68 | 
69 | python -B tests/manage.py test
70 | 
71 | # only collect coverage in travis ci
72 | if [ ${COVERAGE} ]; then
73 |   echo 'Starting to collect coverage...'
74 |   coverage run --source=tests tests/manage.py test
75 | fi
76 | 
77 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | from os.path import dirname, join
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | VERSION = (1, 3, 0)
 8 | __version__ = VERSION
 9 | __versionstr__ = '.'.join(map(str, VERSION))
10 | 
11 | long_description = 'Should have been loaded from README.md.'
12 | with open(join(dirname(__file__), 'README.rst')) as f:
13 |     long_description = f.read().strip()
14 | 
15 | 
16 | install_requires = [
17 |     'django>=1.8',
18 |     'elasticsearch-dsl>=2.0.0,<3.0.0',
19 |     'elasticsearch>=2.0.0,<3.0.0',
20 |     'python-dateutil',
21 |     'six',
22 | ]
23 | 
24 | tests_require = []
25 | 
26 | # use external unittest for 2.6
27 | if sys.version_info[:2] == (2, 6):
28 |     tests_require.append('unittest2')
29 | 
30 | setup(
31 |     name="bungiesearch",
32 |     description="A Django elasticsearch wrapper and helper using elasticsearch-dsl-py high level library.",
33 |     license="BSD-3",
34 |     url="https://github.com/ChristopherRabotin/bungiesearch",
35 |     long_description=long_description,
36 |     version=__versionstr__,
37 |     author="Christopher Rabotin",
38 |     author_email="christopher.rabotin@gmail.com",
39 |     packages=find_packages(
40 |         where='.',
41 |         exclude=('bungiesearch/tests',)
42 |     ),
43 |     classifiers=[
44 |         "Development Status :: 5 - Production/Stable",
45 |         "Intended Audience :: Developers",
46 |         "License :: OSI Approved :: BSD License",
47 |         "Operating System :: OS Independent",
48 |         "Programming Language :: Python :: 2",
49 |         "Programming Language :: Python :: 3",
50 |         "Programming Language :: Python :: 3.4",
51 |         "Programming Language :: Python :: 3.5",
52 |         "Framework :: Django"
53 |     ],
54 |     keywords="elasticsearch haystack django bungiesearch",
55 |     install_requires=install_requires,
56 |     dependency_links=['https://github.com/elasticsearch/elasticsearch-dsl-py#egg=elasticsearch-dsl-py'],
57 | )
58 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -e
 3 | 
 4 | if [ $(whoami) == "root" ] && [ "$1" != "--force" ]; then
 5 |   echo "It's not recommended to run setup with root"
 6 |   echo 'run with --force to ignore'
 7 |   exit 1
 8 | fi
 9 | 
10 | if [ -z "$VIRTUAL_ENV" ] && [ "$1" != "--force" ]; then
11 |   echo "$0 should be run inside a python virtualenv"
12 |   echo 'run with --force to ignore'
13 |   exit 1
14 | fi
15 | 
16 | echo 'Installing Python dependencies'
17 | pip install pip setuptools --upgrade
18 | pip install -r requirements.txt
19 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/core/__init__.py


--------------------------------------------------------------------------------
/tests/core/analysis.py:
--------------------------------------------------------------------------------
 1 | from elasticsearch_dsl.analysis import analyzer, token_filter
 2 | 
 3 | edge_ngram_analyzer = analyzer(
 4 |     'edge_ngram_analyzer',
 5 |     type='custom',
 6 |     tokenizer='standard',
 7 |     filter=[
 8 |         'lowercase',
 9 |         token_filter(
10 |             'edge_ngram_filter',
11 |             type='edgeNGram',
12 |             min_gram=2,
13 |             max_gram=20
14 |         )
15 |     ]
16 | )
17 | 


--------------------------------------------------------------------------------
/tests/core/bungie_signal.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This test signal acts as a proxy to BungieSignalProcessor. It allows us
 3 | to test the functionality of the default signal processor while using a
 4 | custom processor instead, hence testing that we can plug in and use a custom
 5 | signal processor.
 6 | '''
 7 | from django.db.models import signals
 8 | 
 9 | from bungiesearch.signals import BungieSignalProcessor
10 | 
11 | 
12 | class BungieTestSignalProcessor(BungieSignalProcessor):
13 | 
14 |     def handle_save(self, sender, instance, **kwargs):
15 |         self.post_save_connector(sender, instance, **kwargs)
16 | 
17 |     def handle_delete(self, sender, instance, **kwargs):
18 |         self.pre_delete_connector(sender, instance, **kwargs)
19 | 
20 |     def setup(self, model):
21 |         signals.post_save.connect(self.handle_save, sender=model)
22 |         signals.pre_delete.connect(self.handle_delete, sender=model)
23 |         self.setup_ran = True
24 | 
25 |     def teardown(self, model):
26 |         signals.pre_delete.disconnect(self.handle_delete, sender=model)
27 |         signals.post_save.disconnect(self.handle_save, sender=model)
28 |         self.teardown_ran = True
29 | 


--------------------------------------------------------------------------------
/tests/core/models.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | 
 3 | from bungiesearch.managers import BungiesearchManager
 4 | 
 5 | 
 6 | class Article(models.Model):
 7 |     title = models.TextField(db_index=True)
 8 |     authors = models.TextField(blank=True)
 9 |     description = models.TextField(blank=True)
10 |     text_field = models.TextField(null=True)
11 |     link = models.URLField(max_length=510, unique=True, db_index=True)
12 |     published = models.DateTimeField(null=True)
13 |     created = models.DateTimeField(auto_now_add=True)
14 |     updated = models.DateTimeField(null=True)
15 |     tweet_count = models.IntegerField()
16 |     raw = models.BinaryField(null=True)
17 |     source_hash = models.BigIntegerField(null=True)
18 |     missing_data = models.CharField(blank=True, max_length=255)
19 |     positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
20 |     negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0)
21 |     popularity_index = models.IntegerField(default=0)
22 | 
23 |     objects = BungiesearchManager()
24 | 
25 |     class Meta:
26 |         app_label = 'core'
27 | 
28 | 
29 | class User(models.Model):
30 |     name = models.TextField(db_index=True)
31 |     user_id = models.TextField(blank=True, primary_key=True)
32 |     about = models.TextField(blank=True)
33 |     created = models.DateTimeField(auto_now_add=True)
34 |     updated = models.DateTimeField(null=True)
35 | 
36 |     objects = BungiesearchManager()
37 | 
38 |     class Meta:
39 |         app_label = 'core'
40 | 
41 | 
42 | class NoUpdatedField(models.Model):
43 |     field_title = models.TextField(db_index=True)
44 |     field_description = models.TextField(blank=True)
45 | 
46 |     objects = BungiesearchManager()
47 | 
48 |     class Meta:
49 |         app_label = 'core'
50 | 
51 | 
52 | class ManangedButEmpty(models.Model):
53 |     field_title = models.TextField(db_index=True)
54 |     field_description = models.TextField(blank=True)
55 | 
56 |     objects = BungiesearchManager()
57 | 
58 |     class Meta:
59 |         app_label = 'core'
60 | 
61 | 
62 | class Unmanaged(models.Model):
63 |     field_title = models.TextField(db_index=True)
64 |     field_description = models.TextField(blank=True)
65 | 
66 |     class Meta:
67 |         app_label = 'core'
68 | 


--------------------------------------------------------------------------------
/tests/core/search_aliases.py:
--------------------------------------------------------------------------------
 1 | from bungiesearch.aliases import SearchAlias
 2 | from core.models import Article, NoUpdatedField
 3 | 
 4 | 
 5 | class SearchTitle(SearchAlias):
 6 |     def alias_for(self, title):
 7 |         return self.search_instance.query('match', title=title)
 8 | 
 9 |     class Meta:
10 |         models = (Article,)
11 |         alias_name = 'title_search'
12 | 
13 | class Title(SearchAlias):
14 |     def alias_for(self, title):
15 |         return self.search_instance.query('match', title=title)
16 | 
17 | class InvalidAlias(SearchAlias):
18 |     class Meta:
19 |         models = (Article,)
20 | 
21 | class TitleFilter(SearchAlias):
22 |     def alias_for(self, title):
23 |         return self.search_instance.filter('term', title=title)
24 | 
25 | class NoUpdatedMdlOnly(SearchAlias):
26 |     def alias_for(self, title):
27 |         return self.search_instance.filter('term', title=title)
28 | 
29 |     class Meta:
30 |         models = (NoUpdatedField,)
31 | 
32 | class ReturningSelfAlias(SearchAlias):
33 |     def alias_for(self):
34 |         return self
35 | 
36 |     class Meta:
37 |         alias_name = 'get_alias_for_test'
38 | 
39 | class BisIndex(SearchAlias):
40 |     def alias_for(self):
41 |         self.search_instance._index = 'bungiesearch_demo_bis'
42 |         return self.search_instance
43 | 
44 |     class Meta:
45 |         models = (Article,)
46 |         alias_name = 'bisindex'
47 | 


--------------------------------------------------------------------------------
/tests/core/search_indices.py:
--------------------------------------------------------------------------------
 1 | from bungiesearch.fields import DateField, NumberField, StringField
 2 | from bungiesearch.indices import ModelIndex
 3 | from core.models import Article, NoUpdatedField, User
 4 | 
 5 | from .analysis import edge_ngram_analyzer
 6 | 
 7 | 
 8 | class ArticleIndex(ModelIndex):
 9 |     effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
10 |     meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
11 |     text = StringField(template='article.txt', analyzer=edge_ngram_analyzer)
12 | 
13 |     class Meta:
14 |         model = Article
15 |         updated_field = 'updated'
16 |         exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
17 |         hotfixes = {'updated': {'null_value': '2013-07-01'},
18 |                     'title': {'boost': 1.75},
19 |                     'description': {'boost': 1.35},
20 |                     'full_text': {'boost': 1.125}}
21 |         default = True
22 | 
23 | 
24 | class UserIndex(ModelIndex):
25 |     effective_date = DateField(eval_as='obj.created if obj.created and obj.updated > obj.created else obj.updated')
26 |     about = StringField(model_attr='about', analyzer=edge_ngram_analyzer)
27 |     int_about = NumberField(coretype='integer')
28 | 
29 |     def prepare_int_about(self, obj):
30 |         try:
31 |             int_about = int(obj.about)
32 |         except ValueError:
33 |             int_about = 1
34 | 
35 |         return int_about
36 | 
37 |     class Meta:
38 |         model = User
39 |         id_field = 'user_id'
40 |         updated_field = 'updated'
41 |         hotfixes = {'updated': {'null_value': '2013-07-01'},
42 |                     'about': {'boost': 1.35}}
43 |         default = True
44 | 
45 | 
46 | class NoUpdatedFieldIndex(ModelIndex):
47 |     class Meta:
48 |         model = NoUpdatedField
49 |         exclude = ('field_description',)
50 |         optimize_queries = True
51 |         indexing_query = NoUpdatedField.objects.defer(*exclude).select_related().all()
52 | 


--------------------------------------------------------------------------------
/tests/core/search_indices_bis.py:
--------------------------------------------------------------------------------
 1 | from bungiesearch.fields import DateField, StringField
 2 | from bungiesearch.indices import ModelIndex
 3 | from core.models import Article, ManangedButEmpty, User
 4 | 
 5 | 
 6 | class ArticleIndex(ModelIndex):
 7 |     effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
 8 |     meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
 9 |     more_fields = StringField(eval_as='"some value"')
10 | 
11 |     class Meta:
12 |         model = Article
13 |         updated_field = 'updated'
14 |         exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
15 |         hotfixes = {'updated': {'null_value': '2013-07-01'},
16 |                     'title': {'boost': 1.75},
17 |                     'description': {'boost': 1.35},
18 |                     'full_text': {'boost': 1.125}}
19 |         default = False
20 | 
21 | 
22 | class UserIndex(ModelIndex):
23 |     effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published')
24 |     meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])')
25 |     more_fields = StringField(eval_as='"some value"')
26 | 
27 |     class Meta:
28 |         model = User
29 |         id_field = 'user_id'
30 |         updated_field = 'updated'
31 |         exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash')
32 |         hotfixes = {'updated': {'null_value': '2013-07-01'},
33 |                     'title': {'boost': 1.75},
34 |                     'about': {'boost': 1.35},
35 |                     'full_text': {'boost': 1.125}}
36 |         default = False
37 | 
38 | 
39 | class EmptyIndex(ModelIndex):
40 |     def matches_indexing_condition(self, item):
41 |         return False
42 | 
43 |     class Meta:
44 |         model = ManangedButEmpty
45 |         exclude = ('field_description',)
46 |         optimize_queries = True
47 | 


--------------------------------------------------------------------------------
/tests/core/templates/article.txt:
--------------------------------------------------------------------------------
1 | {{ object.title }}
2 | {{ object.authors }}
3 | {{ object.description }}
4 | {{ object.text_field }}


--------------------------------------------------------------------------------
/tests/core/test_bungiesearch.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | from django.core.management import call_command
  4 | from django.test import TestCase, override_settings
  5 | from six import iteritems
  6 | 
  7 | import pytz
  8 | from bungiesearch import Bungiesearch
  9 | from bungiesearch.utils import update_index
 10 | from core.bungie_signal import BungieTestSignalProcessor
 11 | from core.models import (Article, ManangedButEmpty, NoUpdatedField, Unmanaged,
 12 |                          User)
 13 | from core.search_indices import ArticleIndex, UserIndex
 14 | 
 15 | 
 16 | class CoreTestCase(TestCase):
 17 |     @classmethod
 18 |     def setUpClass(cls):
 19 |         # Let's start by creating the index and mapping.
 20 |         # If we create an object before the index, the index
 21 |         # will be created automatically, and we want to test the command.
 22 |         call_command('search_index', action='create')
 23 | 
 24 |         art_1 = {'title': 'Title one',
 25 |                  'description': 'Description of article 1.',
 26 |                  'text_field': '',
 27 |                  'link': 'http://example.com/article_1',
 28 |                  'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)),
 29 |                  'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)),
 30 |                  'tweet_count': 20,
 31 |                  'source_hash': 159159159159,
 32 |                  'missing_data': '',
 33 |                  'positive_feedback': 50,
 34 |                  'negative_feedback': 5,
 35 |                  }
 36 | 
 37 |         user_1 = {'user_id': 'bungie1',
 38 |                   'about': 'Description of user 1',
 39 |                   'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)),
 40 |                   'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)),
 41 |                  }
 42 | 
 43 |         Article.objects.create(**art_1)
 44 |         User.objects.create(**user_1)
 45 | 
 46 |         art_2 = dict((k, v) for k, v in iteritems(art_1))
 47 |         art_2['link'] += '/page2'
 48 |         art_2['title'] = 'Title two'
 49 |         art_2['description'] = 'This is a second article.'
 50 |         art_2['text_field'] = None
 51 |         art_2['published'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15))
 52 | 
 53 |         user_2 = dict((k, v) for k, v in iteritems(user_1))
 54 |         user_2['user_id'] = 'bungie2'
 55 |         user_2['about'] = 'This is the second user'
 56 |         user_2['created'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15))
 57 | 
 58 |         Article.objects.create(**art_2)
 59 |         User.objects.create(**user_2)
 60 |         NoUpdatedField.objects.create(field_title='My title', field_description='This is a short description.')
 61 | 
 62 |         call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged')
 63 | 
 64 |     def test_count_after_clear(self):
 65 |         # can flake because elasticsearch create API is asynchronous
 66 |         self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2)
 67 |         call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged')
 68 |         self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2)
 69 | 
 70 |     @classmethod
 71 |     def tearDownClass(cls):
 72 |         call_command('search_index', action='delete', confirmed='guilty-as-charged')
 73 | 
 74 |     def test_model_index_generation(self):
 75 |         '''
 76 |         Check that the mapping is the expected one.
 77 |         '''
 78 |         expected_article = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'},
 79 |                                            'description': {'type': 'string', 'boost': 1.35, 'analyzer': 'snowball'},
 80 |                                            'text': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'},
 81 |                                            'text_field': {'type': 'string', 'analyzer': 'snowball'},
 82 |                                            'created': {'type': 'date'},
 83 |                                            'title': {'type': 'string', 'boost': 1.75, 'analyzer': 'snowball'},
 84 |                                            'authors': {'type': 'string', 'analyzer': 'snowball'},
 85 |                                            'meta_data': {'type': 'string', 'analyzer': 'snowball'},
 86 |                                            'link': {'type': 'string', 'analyzer': 'snowball'},
 87 |                                            'effective_date': {'type': 'date'},
 88 |                                            'tweet_count': {'type': 'integer'},
 89 |                                            'id': {'type': 'integer'},
 90 |                                            '_id': {'type': 'integer'}, # This is the elastic search index.
 91 |                                            'published': {'type': 'date'}}
 92 |                            }
 93 |         expected_user = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'},
 94 |                                         'about': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'},
 95 |                                         'int_about': {'type': 'integer'},
 96 |                                         'user_id': {'analyzer': 'snowball', 'type': 'string'},
 97 |                                         'effective_date': {'type': 'date'},
 98 |                                         'created': {'type': 'date'},
 99 |                                         'name': {'analyzer': 'snowball', 'type': 'string'},
100 |                                         '_id': {'analyzer': 'snowball', 'type': 'string'}}
101 |                         }
102 | 
103 |         self.assertEqual(ArticleIndex().get_mapping(), expected_article)
104 |         self.assertEqual(UserIndex().get_mapping(), expected_user)
105 | 
106 |     def test_fetch_item(self):
107 |         '''
108 |         Test searching and mapping.
109 |         '''
110 |         self.assertEqual(Article.objects.search.query('match', _all='Description')[0], Article.objects.get(title='Title one'), 'Searching for "Description" did not return just the first Article.')
111 |         self.assertEqual(Article.objects.search.query('match', _all='second article')[0], Article.objects.get(title='Title two'), 'Searching for "second article" did not return the second Article.')
112 | 
113 |         self.assertEqual(User.objects.search.query('match', _all='Description')[0], User.objects.get(user_id='bungie1'), 'Searching for "About" did not return the User.')
114 |         self.assertEqual(User.objects.search.query('match', _all='second user')[0], User.objects.get(user_id='bungie2'), 'Searching for "second user" did not return the User.')
115 | 
116 |     def test_raw_fetch(self):
117 |         '''
118 |         Test searching and mapping.
119 |         '''
120 |         item = Article.objects.search.query('match', _all='Description')[:1:True]
121 |         self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.')
122 | 
123 |         item = User.objects.search.query('match', _all='Description')[:1:True]
124 |         self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.')
125 | 
126 |     def test_iteration(self):
127 |         '''
128 |         Tests iteration on Bungiesearch items.
129 |         '''
130 |         lazy_search_article = Article.objects.search.query('match', title='title')
131 |         db_items = list(Article.objects.all())
132 |         self.assertTrue(all([result in db_items for result in lazy_search_article]), 'Searching for title "title" did not return all articles.')
133 |         self.assertTrue(all([result in db_items for result in lazy_search_article[:]]), 'Searching for title "title" did not return all articles when using empty slice.')
134 |         self.assertEqual(len(lazy_search_article[:1]), 1, 'Get item with start=None and stop=1 did not return one item.')
135 |         self.assertEqual(len(lazy_search_article[:2]), 2, 'Get item with start=None and stop=2 did not return two item.')
136 | 
137 |         lazy_search_user = User.objects.search.query('match', about='user')
138 |         db_items = list(User.objects.all())
139 |         self.assertTrue(all([result in db_items for result in lazy_search_user]), 'Searching for description "user" did not return all articles.')
140 |         self.assertTrue(all([result in db_items for result in lazy_search_user[:]]), 'Searching for description "user" did not return all articles when using empty slice.')
141 |         self.assertEqual(len(lazy_search_user[:1]), 1, 'Get item with start=None and stop=1 did not return one item.')
142 |         self.assertEqual(len(lazy_search_user[:2]), 2, 'Get item with start=None and stop=2 did not return two item.')
143 | 
144 |     def test_no_results(self):
145 |         '''
146 |         Test empty results.
147 |         '''
148 |         self.assertEqual(list(Article.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
149 |         self.assertEqual(Article.objects.search.query('match', _all='nothing')[:10], [], 'Searching for "nothing" did not return an empty list on get item call.')
150 | 
151 |         self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
152 |         self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.')
153 | 
154 |     def test_custom_search(self):
155 |         '''
156 |         Test searching on custom index and doc_type.
157 |         '''
158 |         search = Article.objects.custom_search(index='bungiesearch_demo', doc_type='Article')
159 |         es_art1 = search.query('match', _all='Description')[0]
160 |         db_art1 = Article.objects.get(title='Title one')
161 |         es_art2 = search.query('match', _all='second article')[0]
162 |         db_art2 = Article.objects.get(title='Title two')
163 |         self.assertTrue(all([es_art1.id == db_art1.id, es_art1.title == db_art1.title, es_art1.description == db_art1.description]), 'Searching for "Description" did not return the first Article.')
164 |         self.assertTrue(all([es_art2.id == db_art2.id, es_art2.title == db_art2.title, es_art2.description == db_art2.description]), 'Searching for "second article" did not return the second Article.')
165 | 
166 |         search = User.objects.custom_search(index='bungiesearch_demo', doc_type='User')
167 |         es_user1 = search.query('match', _all='Description')[0]
168 |         db_user1 = User.objects.get(user_id='bungie1')
169 |         self.assertRaises(AttributeError, getattr, es_user1, 'id')
170 |         self.assertTrue(all([es_user1.user_id == db_user1.user_id, es_user1.about == db_user1.about]), 'Searching for "About" did not return the first User.')
171 | 
172 |     def test_get_model(self):
173 |         '''
174 |         Test model mapping.
175 |         '''
176 |         self.assertEqual(ArticleIndex().get_model(), Article, 'Model was not Article.')
177 |         self.assertEqual(UserIndex().get_model(), User, 'Model was not User')
178 | 
179 |     def test_cloning(self):
180 |         '''
181 |         Tests that Bungiesearch remains lazy with specific function which should return clones.
182 |         '''
183 |         inst = Article.objects.search.query('match', _all='Description')
184 |         self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.')
185 | 
186 |         inst = User.objects.search.query('match', _all='Description')
187 |         self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.')
188 | 
189 |     def test_search_alias_exceptions(self):
190 |         '''
191 |         Tests that invalid aliases raise exceptions.
192 |         '''
193 |         self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias')
194 |         self.assertRaises(NotImplementedError, Article.objects.bsearch_invalidalias)
195 |         self.assertRaises(ValueError, getattr, Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter'), 'bsearch_noupdatedmdlonly')
196 | 
197 |     @override_settings(BUNGIESEARCH={})
198 |     def test_search_alias_not_setup(self):
199 |         '''
200 |         Tests that Bungiesearch is not instantiated when not set up
201 |         This is its own test due to the override_settings decorator
202 |         '''
203 |         self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias')
204 |         self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_title_search')
205 | 
206 |     def test_search_aliases(self):
207 |         '''
208 |         Tests search alias errors and functionality.
209 |         '''
210 |         title_alias = Article.objects.bsearch_title_search('title')
211 |         db_items = list(Article.objects.all())
212 |         self.assertEqual(title_alias.to_dict(), {'query': {'match': {'title': 'title'}}}, 'Title alias search did not return the expected JSON query.')
213 |         self.assertTrue(all([result in db_items for result in title_alias]), 'Alias searching for title "title" did not return all articles.')
214 |         self.assertTrue(all([result in db_items for result in title_alias[:]]), 'Alias searching for title "title" did not return all articles when using empty slice.')
215 |         self.assertEqual(len(title_alias[:1]), 1, 'Get item on an alias search with start=None and stop=1 did not return one item.')
216 |         self.assertEqual(len(title_alias[:2]), 2, 'Get item on an alias search with start=None and stop=2 did not return two item.')
217 |         self.assertEqual(title_alias.to_dict(), Article.objects.bsearch_title('title').to_dict(), 'Alias applicable to all models does not return the same JSON request body as the model specific one.')
218 |         self.assertEqual(NoUpdatedField.objects.search.filter('term', title='My title').to_dict(), NoUpdatedField.objects.bsearch_noupdatedmdlonly('My title').to_dict(), 'Alias applicable only to NoUpdatedField does not generate the correct filter.')
219 | 
220 |     def test_bungie_instance_search_aliases(self):
221 |         alias_dictd = Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter').to_dict()
222 |         expected = {'query': {'bool': {'filter': [{'term': {'title': 'title filter'}}], 'must': [{'match': {'title': 'title query'}}]}}}
223 |         self.assertEqual(alias_dictd, expected, 'Alias on Bungiesearch instance did not return the expected dictionary.')
224 | 
225 |     def test_search_alias_model(self):
226 |         self.assertEqual(Article.objects.bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.')
227 |         self.assertEqual(Article.objects.search.bsearch_title('title query').bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.')
228 |         self.assertRaises(ValueError, Bungiesearch().bsearch_get_alias_for_test().get_model)
229 | 
230 |     def test_post_save(self):
231 |         art = {'title': 'Title three',
232 |                'description': 'Postsave',
233 |                'link': 'http://example.com/sparrho',
234 |                'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)),
235 |                'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)),
236 |                'tweet_count': 20,
237 |                'source_hash': 159159159159,
238 |                'missing_data': '',
239 |                'positive_feedback': 50,
240 |                'negative_feedback': 5}
241 |         obj = Article.objects.create(**art)
242 |         find_three = Article.objects.search.query('match', title='three')
243 |         self.assertEqual(len(find_three), 2, 'Searching for "three" in title did not return exactly two items (got {}).'.format(find_three))
244 |         # Let's check that both returned items are from different indices.
245 |         self.assertNotEqual(find_three[0:1:True].meta.index, find_three[1:2:True].meta.index, 'Searching for "three" did not return items from different indices.')
246 |         # Let's now delete this object to test the post delete signal.
247 |         obj.delete()
248 | 
249 |     def test_bulk_delete(self):
250 |         '''
251 |         This tests that using the update_index function with 'delete' as the action performs a bulk delete operation on the data.
252 |         '''
253 |         bulk_art1 = {'title': 'Title four',
254 |                      'description': 'Bulk delete first',
255 |                      'link': 'http://example.com/bd1',
256 |                      'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)),
257 |                      'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)),
258 |                      'tweet_count': 20,
259 |                      'source_hash': 159159159159,
260 |                      'missing_data': '',
261 |                      'positive_feedback': 50,
262 |                      'negative_feedback': 5}
263 |         bulk_art2 = {'title': 'Title five',
264 |                      'description': 'Bulk delete second',
265 |                      'link': 'http://example.com/bd2',
266 |                      'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)),
267 |                      'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)),
268 |                      'tweet_count': 20,
269 |                      'source_hash': 159159159159,
270 |                      'missing_data': '',
271 |                      'positive_feedback': 50,
272 |                      'negative_feedback': 5}
273 | 
274 |         bulk_obj1 = Article.objects.create(**bulk_art1)
275 |         bulk_obj2 = Article.objects.create(**bulk_art2)
276 | 
277 |         find_five = Article.objects.search.query('match', title='five')
278 |         self.assertEqual(len(find_five), 2, 'Searching for "five" in title did not return exactly two results (got {})'.format(find_five))
279 | 
280 |         model_items = [bulk_obj1.pk, bulk_obj2.pk]
281 |         model_name = Article.__name__
282 |         update_index(model_items, model_name, action='delete', bulk_size=2, num_docs=-1, start_date=None, end_date=None, refresh=True)
283 | 
284 |         find_four = Article.objects.search.query('match', title='four')
285 |         self.assertEqual(len(find_four), 0, 'Searching for "four" in title did not return exactly zero results (got {})'.format(find_four))
286 |         find_five = Article.objects.search.query('match', title='five')
287 |         self.assertEqual(len(find_five), 0, 'Searching for "five" in title did not return exactly zero results (got {})'.format(find_five))
288 | 
289 |     def test_manager_interference(self):
290 |         '''
291 |         This tests that saving an object which is not managed by Bungiesearch won't try to update the index for that model.
292 |         '''
293 |         Unmanaged.objects.create(field_title='test', field_description='blah')
294 | 
295 |     def test_time_indexing(self):
296 |         update_index(Article.objects.all(), 'Article', start_date=datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M'))
297 |         update_index(NoUpdatedField.objects.all(), 'NoUpdatedField', end_date=datetime.strftime(datetime.now(), '%Y-%m-%d'))
298 | 
299 |     def test_optimal_queries(self):
300 |         db_item = NoUpdatedField.objects.get(pk=1)
301 |         src_item = NoUpdatedField.objects.search.query('match', field_title='My title')[0]
302 |         self.assertEqual(src_item.id, db_item.id, 'Searching for the object did not return the expected object id.')
303 |         self.assertEqual(src_item.get_deferred_fields(), {'field_description'}, 'Was expecting description in the set of deferred fields.')
304 | 
305 |     def test_concat_queries(self):
306 |         items = Article.objects.bsearch_title_search('title')[::False] + NoUpdatedField.objects.search.query('match', field_title='My title')[::False]
307 |         for item in items:
308 |             model = item._meta.proxy_for_model if item._meta.proxy_for_model else type(item)
309 |             self.assertIn(model, [Article, NoUpdatedField], 'Got an unmapped item ({}), or an item with an unexpected mapping.'.format(type(item)))
310 | 
311 |     def test_data_templates(self):
312 |         # One article has a title that contains 'one'
313 |         match_one = Article.objects.search.query('match', text='one')
314 |         self.assertEqual(len(match_one), 2, 'Searching for "one" in text did not return exactly one item (got {}).'.format(match_one))
315 |         self.assertEqual(match_one[0].title, 'Title one', 'Searching for "one" in text did not yield the first article (got {})'.format(match_one[0].title))
316 | 
317 |         # Two articles have a description that contain 'article'
318 |         match_two = Article.objects.search.query('match', text='article')
319 |         self.assertEqual(len(match_two), 4, 'Searching for "article" in text did not return exactly two items (got {})'.format(match_two))
320 | 
321 |         # Two articles have a link with 'example,' but since link isn't in the template, there should be zero results
322 |         match_zero = Article.objects.search.query('match', text='example')
323 |         self.assertEqual(len(match_zero), 0, 'Searching for "article" in text did not return exactly zero items (got {})'.format(match_zero))
324 | 
325 |     def test_fields(self):
326 |         '''
327 |         Checking that providing a specific field will correctly fetch these items from elasticsearch.
328 |         '''
329 |         for mdl, id_field in [(Article, 'id'), (User, 'user_id')]:
330 |             raw_items = mdl.objects.search.fields('_id')[:5:True]
331 |             self.assertTrue(all([dir(raw) == ['meta'] for raw in raw_items]), 'Requesting only _id returned more than just meta info from ES for model {}.'.format(mdl))
332 |             items = mdl.objects.search.fields('_id')[:5]
333 |             self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id only search did not return all results for model {}.'.format(mdl))
334 |             items = mdl.objects.search.fields([id_field, '_id', '_source'])[:5]
335 |             self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id, id and _source search did not return all results for model {}.'.format(mdl))
336 | 
337 |     def test_prepare_field(self):
338 |         '''
339 |         Check that providing a method to calculate the value of a field will yield correct results in the search index.
340 |         '''
341 |         user_int_description = {'user_id': 'bungie3',
342 |                                 'about': '123',
343 |                                 'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)),
344 |                                 'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)),
345 |                                 }
346 |         User.objects.create(**user_int_description)
347 | 
348 |         find_one = User.objects.search.filter('term', int_about=1)
349 |         self.assertEqual(len(find_one), 4, 'Searching for users with default int description did not return exactly 4 items (got {})'.format(find_one))
350 | 
351 |         find_123 = User.objects.search.filter('term', int_about=123)
352 |         self.assertEqual(len(find_one), 4, 'Searching for users with int description 123 did not return exactly 2 items (got {})'.format(find_123))
353 | 
354 |         find_zero = User.objects.search.filter('term', int_about=0)
355 |         self.assertEqual(len(find_zero), 0, 'Searching for users with int description zero did not return exactly 0 items (got {})'.format(find_zero))
356 | 
357 |     def test_fun(self):
358 |         '''
359 |         Test fun queries.
360 |         '''
361 |         lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id')
362 |         print(len(lazy)) # Returns the total hits computed by elasticsearch.
363 |         assert all([type(item) == Article for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]])
364 | 
365 |     def test_meta(self):
366 |         '''
367 |         Test search meta is set.
368 |         '''
369 |         lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id')
370 |         assert all([hasattr(item._searchmeta) for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]])
371 | 
372 |     def test_manangedbutempty(self):
373 |         '''
374 |         Tests that the indexing condition controls indexing properly.
375 |         '''
376 |         mbeo = ManangedButEmpty.objects.create(field_title='Some time', field_description='This should never be indexed.')
377 |         idxi = len(ManangedButEmpty.objects.search)
378 |         self.assertEquals(idxi, 0, 'ManagedButEmpty has {} indexed items instead of zero.'.format(idxi))
379 |         mbeo.delete()
380 | 
381 |     def test_specify_index(self):
382 |         self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for Article does not match number in database.')
383 |         self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo_bis').count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database.')
384 |         self.assertEqual(Article.objects.count(), Article.objects.bsearch_bisindex().count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database, using alias.')
385 |         self.assertEqual(NoUpdatedField.objects.count(), NoUpdatedField.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for NoUpdatedField does not match number in database.')
386 |         self.assertEqual(NoUpdatedField.objects.search_index('bungiesearch_demo_bis').count(), 0, 'Indexed items on bungiesearch_demo_bis for NoUpdatedField is zero.')
387 | 
388 |     def test_None_as_missing(self):
389 |         missing = Article.objects.search_index('bungiesearch_demo').filter('missing', field='text_field')
390 |         self.assertEqual(len(missing), 1, 'Filtering by missing text_field does not return exactly one item.')
391 |         self.assertEqual(missing[0].text_field, None, 'The item with missing text_field does not have text_field=None.')
392 | 
393 |     def test_signal_setup_teardown(self):
394 |         '''
395 |         Tests that setup and tear down can be ran.
396 |         '''
397 |         btsp = BungieTestSignalProcessor()
398 |         btsp.setup(Article)
399 |         self.assertTrue(btsp.setup_ran, 'Calling setup on the signal processor did not set it up.')
400 |         btsp.teardown(Article)
401 |         self.assertTrue(btsp.teardown_ran, 'Calling teardown on the signal processor did not tear it down.')
402 | 


--------------------------------------------------------------------------------
/tests/core/test_settings.py:
--------------------------------------------------------------------------------
 1 | from django.conf import settings
 2 | from django.test import TestCase
 3 | 
 4 | from bungiesearch import Bungiesearch
 5 | 
 6 | 
 7 | class SettingsTestCase(TestCase):
 8 | 
 9 |     def test_timeout_used(self):
10 |         settings.BUNGIESEARCH['TIMEOUT'] = 29
11 |         search = Bungiesearch()
12 | 
13 |         self.assertEqual(search.BUNGIE['TIMEOUT'], 29)
14 |         self.assertEqual(search._using.transport.kwargs['timeout'], 29)
15 | 


--------------------------------------------------------------------------------
/tests/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 | 
5 | if __name__ == "__main__":
6 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
7 |     from django.core.management import execute_from_command_line
8 |     execute_from_command_line(sys.argv)
9 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | DJANGO_SETTINGS_MODULE=tests.settings
3 | 


--------------------------------------------------------------------------------
/tests/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | DEBUG = True
 5 | BASE_DIR = os.path.dirname(os.path.dirname(__file__))
 6 | SECRET_KEY = 'cookies_are_delicious_delicacies'
 7 | ROOT_URLCONF = 'urls'
 8 | LANGUAGE_CODE = 'en-us'
 9 | TIME_ZONE = 'UTC'
10 | USE_I18N = True
11 | USE_L10N = True
12 | USE_TZ = True
13 | MIDDLEWARE_CLASSES = ()
14 | DEFAULT_INDEX_TABLESPACE = ''
15 | 
16 | # Make sure the copy of seeker in the directory above this one is used.
17 | sys.path.insert(0, BASE_DIR)
18 | 
19 | INSTALLED_APPS = (
20 |     'bungiesearch',
21 |     'core',
22 | )
23 | 
24 | DATABASES = {
25 |     'default': {
26 |         'ENGINE': 'django.db.backends.sqlite3',
27 |         'NAME': ':memory:',
28 |     }
29 | }
30 | 
31 | TEMPLATES = [
32 |     {
33 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
34 |         'DIRS': [],
35 |         'APP_DIRS': True,
36 |         'OPTIONS': {
37 |             'context_processors': [
38 |                 'django.contrib.auth.context_processors.auth',
39 |                 'django.template.context_processors.debug',
40 |                 'django.template.context_processors.i18n',
41 |                 'django.template.context_processors.media',
42 |                 'django.template.context_processors.static',
43 |                 'django.template.context_processors.tz',
44 |                 'django.contrib.messages.context_processors.messages',
45 |             ],
46 |         },
47 |     },
48 | ]
49 | 
50 | BUNGIESEARCH = {
51 |     'URLS': [os.getenv('ELASTIC_SEARCH_URL', 'localhost')],
52 |     'ES_SETTINGS': {
53 |         'http_auth': os.getenv('ELASTIC_SEARCH_AUTH')
54 |     },
55 |     'ALIASES': {
56 |         'bsearch': 'core.search_aliases'
57 |     },
58 |     'INDICES': {
59 |         'bungiesearch_demo': 'core.search_indices',
60 |         'bungiesearch_demo_bis': 'core.search_indices_bis'
61 |     },
62 |     'SIGNALS': {
63 |         'BUFFER_SIZE': 1,
64 |         'SIGNAL_CLASS': 'core.bungie_signal.BungieTestSignalProcessor'
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------