├── .gitignore ├── .project ├── .pydevproject ├── .settings └── org.eclipse.core.resources.prefs ├── .travis.yml ├── LICENSE ├── README.rst ├── bungiesearch ├── __init__.py ├── aliases.py ├── fields.py ├── indices.py ├── logger.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── _utils.py │ │ ├── clear_index.py │ │ ├── rebuild_index.py │ │ └── search_index.py ├── managers.py ├── signals.py └── utils.py ├── requirements.txt ├── runtests.sh ├── setup.cfg ├── setup.py ├── setup.sh └── tests ├── __init__.py ├── core ├── __init__.py ├── analysis.py ├── bungie_signal.py ├── models.py ├── search_aliases.py ├── search_indices.py ├── search_indices_bis.py ├── templates │ └── article.txt ├── test_bungiesearch.py └── test_settings.py ├── manage.py ├── pytest.ini └── settings.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg 3 | 4 | /venv 5 | /build/ 6 | /dist/ 7 | /cache/ 8 | /.cache/ 9 | /bungiesearch.egg-info/ 10 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | bungiesearch 4 | 5 | 6 | 7 | 8 | 9 | org.python.pydev.PyDevBuilder 10 | 11 | 12 | 13 | 14 | 15 | org.python.pydev.pythonNature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.pydevproject: -------------------------------------------------------------------------------- 1 | 2 | 3 | bungiesearch 4 | python 2.7 5 | 6 | /${PROJECT_DIR_NAME}/bungiesearch 7 | /${PROJECT_DIR_NAME}/tests 8 | 9 | 10 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/setup.py=utf-8 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | env: 4 | global: 5 | - TRAVIS=true 6 | - ELASTIC_SEARCH_URL=localhost 7 | matrix: 8 | include: 9 | - python: "2.7" 10 | env: DJANGO_VERSION=">=1.8,<1.9" 11 | - python: "2.7" 12 | env: DJANGO_VERSION=">=1.9,<1.10" 13 | - python: "2.7" 14 | env: DJANGO_VERSION=">=1.10,<1.11" 15 | - python: "3.4" 16 | env: DJANGO_VERSION=">=1.8,<1.9" 17 | - python: "3.4" 18 | env: DJANGO_VERSION=">=1.9,<1.10" 19 | - python: "3.5" 20 | env: DJANGO_VERSION=">=1.8,<1.9" 21 | - python: "3.5" 22 | env: DJANGO_VERSION=">=1.9,<1.10" 23 | - python: "3.5" 24 | env: DJANGO_VERSION=">=1.10,<1.11" COVERAGE=true 25 | install: 26 | - pip install Django$DJANGO_VERSION 27 | - pip install -r requirements.txt 28 | - wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-2.3.0.zip 29 | - unzip -o elasticsearch-2.3.0.zip &> /dev/null 30 | script: 31 | - ./runtests.sh --cluster 32 | after_success: 33 | test -n "$COVERAGE" && coveralls 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Sparrho 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of Sparrho nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | WARNING: UNMAINTAINED 2 | ============ 3 | This package is no longer maintained. You may want to check out the `elasticsearch-dsl-py `__ or `django-haystack `__. 4 | 5 | Bungiesearch 6 | ============ 7 | 8 | |Build Status| |Coverage Status| 9 | 10 | .. contents:: Table of contents 11 | :depth: 2 12 | 13 | Purpose 14 | ======= 15 | 16 | Bungiesearch is a Django wrapper for 17 | `elasticsearch-dsl-py `__. 18 | It inherits from elasticsearch-dsl-py's ``Search`` class, so all the 19 | fabulous features developed by the elasticsearch-dsl-py team are also 20 | available in Bungiesearch. In addition, just like ``Search``, 21 | Bungiesearch is a lazy searching class (and iterable), meaning you can 22 | call functions in a row, or do something like the following. 23 | 24 | .. code:: python 25 | 26 | lazy = Article.objects.search.query('match', _all='Description') 27 | print len(lazy) # Prints the number of hits by only fetching the number of items. 28 | for item in lazy[5:10]: 29 | print item 30 | 31 | Features 32 | ======== 33 | 34 | - Core Python friendly 35 | 36 | - Iteration (``[x for x in lazy_search]``) 37 | - Get items (``lazy_search[10]``) 38 | - Number of hits via ``len`` (``len(lazy_search)``) 39 | 40 | - Index management 41 | 42 | - Creating and deleting an index. 43 | - Creating, updating and deleting doctypes and their mappings. 44 | - Update index doctypes. 45 | 46 | - Django Model Mapping 47 | 48 | - Very easy mapping (no lies). 49 | - Automatic model mapping (and supports undefined models by 50 | returning a ``Result`` instance of ``elasticsearch-dsl-py``). 51 | - Efficient database fetching: 52 | 53 | - One fetch for all items of a given model. 54 | - Fetches only desired fields. 55 | 56 | - Django Manager 57 | 58 | - Easy model integration: 59 | ``MyModel.search.query("match", _all="something to search")``. 60 | - Search aliases (search shortcuts with as many parameters as 61 | wanted): ``Tweet.object.bungie_title_search("bungie")`` or 62 | ``Article.object.bungie_title_search("bungie")``, where 63 | ``bungie_title_search`` is uniquely defined. 64 | 65 | - Django signals 66 | 67 | - Connect to post save and pre delete signals for the elasticsearch 68 | index to correctly reflect the database (almost) at all times. 69 | 70 | - Requirements 71 | 72 | - Django >= 1.8 73 | - Python 2.7, 3.4, 3.5 74 | 75 | Feature examples 76 | ---------------- 77 | 78 | See section "Full example" at the bottom of page to see the code needed 79 | to perform these following examples. ### Query a word (or list thereof) 80 | on a managed model. 81 | 82 | ``Article.objects.search.query('match', _all='Description')`` 83 | 84 | Use a search alias on a model's manager. 85 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | 87 | ``Article.objects.bsearch_title_search('title')`` 88 | 89 | Use a search alias on a bungiesearch instance. 90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91 | 92 | ``Article.objects.search.bsearch_title_search('title').bsearch_titlefilter('filter this title')`` 93 | 94 | Iterate over search results 95 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 96 | 97 | .. code:: python 98 | 99 | # Will print the Django model instance. 100 | for result in Article.objects.search.query('match', _all='Description'): 101 | print result 102 | 103 | Fetch a single item 104 | ~~~~~~~~~~~~~~~~~~~ 105 | 106 | .. code:: python 107 | 108 | Article.objects.search.query('match', _all='Description')[0] 109 | 110 | Get the number of returned items 111 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 112 | 113 | .. code:: python 114 | 115 | print len(Article.objects.search.query('match', _all='Description')) 116 | 117 | Deferred model instantiation 118 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | 120 | .. code:: python 121 | 122 | # Will print the Django model instance's primary key. Will only fetch the `pk` field from the database. 123 | for result in Article.objects.search.query('match', _all='Description').only('pk'): 124 | print result.pk 125 | 126 | Elasticsearch limited field fetching 127 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 128 | 129 | .. code:: python 130 | 131 | # Will print the Django model instance. However, elasticsearch's response only has the `_id` field. 132 | for result in Article.objects.search.query('match', _all='Description').fields('_id'): 133 | print result 134 | 135 | Get a specific number of items with an offset. 136 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | This is actually elasticseach-dsl-py functionality, but it's 139 | demonstrated here because we can iterate over the results via 140 | Bungiesearch. 141 | 142 | .. code:: python 143 | 144 | for item in Article.objects.bsearch_title_search('title').only('pk').fields('_id')[5:7]: 145 | print item 146 | 147 | Lazy objects 148 | ~~~~~~~~~~~~ 149 | 150 | .. code:: python 151 | 152 | lazy = Article.objects.bsearch_title_search('title') 153 | print len(lazy) 154 | for item in lazy.filter('range', effective_date={'lte': '2014-09-22'}): 155 | print item 156 | 157 | Installation 158 | ============ 159 | 160 | Unless noted otherwise, each step is required. 161 | 162 | Install the package 163 | ------------------- 164 | 165 | The easiest way is to install the package from PyPi: 166 | 167 | ``pip install bungiesearch`` 168 | 169 | **Note:** Check your version of Django after installing bungiesearch. It 170 | was reported to me directly that installing bungiesearch may upgrade 171 | your version of Django, although I haven't been able to confirm that 172 | myself. Bungiesearch depends on Django 1.7 and above. 173 | 174 | In Django 175 | --------- 176 | 177 | Updating your Django models 178 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 179 | 180 | **Note:** this part is only needed if you want to be able to use search 181 | aliases, which allow you to define shortcuts to complex queries, 182 | available directly from your Django models. I think it's extremely 183 | practical. 184 | 185 | 1. Open your ``models.py`` file. 186 | 2. Add the bungiesearch manager import: 187 | ``from bungiesearch.managers import BungiesearchManager`` 188 | 3. Find the model, or models, you wish to index on Elasticsearch and set 189 | them to be managed by Bungiesearch by adding the objects field to 190 | them, as such: ``objects = BungiesearchManager()``. You should now 191 | have a Django model `similar to 192 | this `__. 193 | 194 | Creating bungiesearch search indexes 195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 196 | 197 | The search indexes define how bungiesearch should serialize each of the 198 | model's objects. It effectively defines how your object is serialized 199 | and how the ES index should be structured. These are referred to as 200 | `ModelIndex `__\ es. 201 | 202 | A good practice here is to have all the bungiesearch stuff in its own 203 | package. For example, for the section of the Sparrho platform that uses 204 | Django, we have a package called ``search`` where we define the search 205 | indexes, and a subpackage called ``aliases`` which has the many aliases 206 | we use (more on that latter). 207 | 208 | 1. Create a subclass of ``ModelIndex``, which you can import from from 209 | ``bungiesearch.indices import ModelIndex``, in a new module 210 | preferably. 211 | 2. In this class, define a class called ``Meta``: it will hold meta 212 | information of this search index for bungiesearch's internal working. 213 | 3. Import the Django model you want to index (from your models file) 214 | and, in the Meta class, define a field called ``model``, which must 215 | be set to the model you want indexed. 216 | 4. By default, bungiesearch will index every field of your model. This 217 | may not always be desired, so you can define which fields must be 218 | excluded in this ``Meta`` class, via the exclude field. 219 | 5. There are plenty of options, so definitely have a read through the 220 | documentation for 221 | `ModelIndex `__. 222 | 223 | Here's `an 224 | example `__ of a 225 | search index. There can be many such definitions in a file. 226 | 227 | Django settings 228 | ~~~~~~~~~~~~~~~ 229 | 230 | This is the final required step. Here's the `full 231 | documentation `__ of 232 | this step. 233 | 234 | 1. Open your settings file and add a ``BUNGIESEARCH`` variable, which 235 | must be a dictionary. 236 | 2. Define ``URLS`` as a list of URLs (which can contain only one) of 237 | your ES servers. 238 | 3. Define the ``INDICES`` key as a dictionary where the key is the name 239 | of the index on ES that you want, and the value is the full Python 240 | path to the module which has all the ModelIndex classes for to be 241 | indexed on that index name. 242 | 4. Set ``ALIASES`` to an empty dictionary (until you define any search 243 | aliases). 244 | 5. You can keep other values as their defaults. 245 | 246 | In your shell 247 | ------------- 248 | 249 | Create the ES indexes 250 | ~~~~~~~~~~~~~~~~~~~~~ 251 | 252 | From your shell, in the Django environment, run the following: 253 | 254 | ``python manage.py search_index --create`` 255 | 256 | Start populating the index 257 | -------------------------- 258 | 259 | Run the following which will take each of the objects in your model, 260 | serialize them, and add them to the elasticsearch index. 261 | 262 | ``python manage.py search_index --update`` 263 | 264 | **Note:** With additional parameters, you can limit the number of 265 | documents to be indexed, as well as set conditions on whether they 266 | should be indexed based on updated time for example. 267 | 268 | In Elasticsearch 269 | ---------------- 270 | 271 | You can now open your elasticsearch dashboard, such as Elastic HQ, and 272 | see that your index is created with the appropriate mapping and has 273 | items that are indexed. 274 | 275 | Quick start example 276 | =================== 277 | 278 | This example is from the ``test`` folder. It may be partially out-dated, 279 | so please refer to the ``test`` folder for the latest version. 280 | 281 | Procedure 282 | --------- 283 | 284 | 1. In your models.py file (or your managers.py), import bungiesearch and 285 | use it as a model manager. 286 | 2. Define one or more ModelIndex subclasses which define the mapping 287 | between your Django model and elasticsearch. 288 | 3. (Optional) Define SearchAlias subclasses which make it trivial to 289 | call complex elasticsearch-dsl-py functions. 290 | 4. Add a BUNGIESEARCH variable in your Django settings, which must 291 | contain the elasticsearch URL(s), the modules for the indices, the 292 | modules for the search aliases and the signal definitions. 293 | 294 | Example 295 | ------- 296 | 297 | Here's the code which is applicable to the previous examples. ### Django 298 | Model 299 | 300 | .. code:: python 301 | 302 | from django.db import models 303 | from bungiesearch.managers import BungiesearchManager 304 | 305 | class Article(models.Model): 306 | title = models.TextField(db_index=True) 307 | authors = models.TextField(blank=True) 308 | description = models.TextField(blank=True) 309 | link = models.URLField(max_length=510, unique=True, db_index=True) 310 | published = models.DateTimeField(null=True) 311 | created = models.DateTimeField(auto_now_add=True) 312 | updated = models.DateTimeField(null=True) 313 | tweet_count = models.IntegerField() 314 | raw = models.BinaryField(null=True) 315 | source_hash = models.BigIntegerField(null=True) 316 | missing_data = models.CharField(blank=True, max_length=255) 317 | positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0) 318 | negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0) 319 | popularity_index = models.IntegerField(default=0) 320 | 321 | objects = BungiesearchManager() 322 | 323 | class Meta: 324 | app_label = 'core' 325 | 326 | ModelIndex 327 | ~~~~~~~~~~ 328 | 329 | The following ModelIndex will generate a mapping containing all fields 330 | from ``Article``, minus those defined in ``ArticleIndex.Meta.exclude``. 331 | When the mapping is generated, each field will the most appropriate 332 | `elasticsearch core 333 | type `__, 334 | with default attributes (as defined in bungiesearch.fields). 335 | 336 | These default attributes can be overwritten with 337 | ``ArticleIndex.Meta.hotfixes``: each dictionary key must be field 338 | defined either in the model or in the ModelIndex subclass 339 | (``ArticleIndex`` in this case). 340 | 341 | .. code:: python 342 | 343 | from core.models import Article 344 | from bungiesearch.fields import DateField, StringField 345 | from bungiesearch.indices import ModelIndex 346 | 347 | 348 | class ArticleIndex(ModelIndex): 349 | effectived_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published') 350 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])') 351 | 352 | class Meta: 353 | model = Article 354 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash') 355 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 356 | 'title': {'boost': 1.75}, 357 | 'description': {'boost': 1.35}, 358 | 'full_text': {'boost': 1.125}} 359 | 360 | SearchAlias 361 | ~~~~~~~~~~~ 362 | 363 | Defines a search alias for one or more models (in this case only for 364 | ``core.models.Article``). 365 | 366 | .. code:: python 367 | 368 | from core.models import Article 369 | from bungiesearch.aliases import SearchAlias 370 | 371 | 372 | class SearchTitle(SearchAlias): 373 | def alias_for(self, title): 374 | return self.search_instance.query('match', title=title) 375 | 376 | class Meta: 377 | models = (Article,) 378 | alias_name = 'title_search' # This is optional. If none is provided, the name will be the class name in lower case. 379 | 380 | class InvalidAlias(SearchAlias): 381 | def alias_for_does_not_exist(self, title): 382 | return title 383 | 384 | class Meta: 385 | models = (Article,) 386 | 387 | Django settings 388 | ~~~~~~~~~~~~~~~ 389 | 390 | .. code:: python 391 | 392 | BUNGIESEARCH = { 393 | 'URLS': [os.getenv('ELASTIC_SEARCH_URL')], 394 | 'INDICES': {'bungiesearch_demo': 'core.search_indices'}, 395 | 'ALIASES': {'bsearch': 'myproject.search_aliases'}, 396 | 'SIGNALS': {'BUFFER_SIZE': 1} # uses BungieSignalProcessor 397 | } 398 | 399 | Documentation 400 | ============= 401 | 402 | ModelIndex 403 | ---------- 404 | 405 | A ``ModelIndex`` defines mapping and object extraction for indexing of a 406 | given Django model. 407 | 408 | Any Django model to be managed by bungiesearch must have a defined 409 | ModelIndex subclass. This subclass must contain a subclass called 410 | ``Meta`` which must have a ``model`` attribute (sets the model which it 411 | represents). 412 | 413 | Class attributes 414 | ~~~~~~~~~~~~~~~~ 415 | 416 | As detailed below, the doc type mapping will contain fields from the 417 | model it related to. However, one may often need to index fields which 418 | correspond to either a concatenation of fields of the model or some 419 | logical operation. 420 | 421 | Bungiesearch makes this very easy: simply define a class attribute as 422 | whichever core type, and set to the ``eval_as`` constructor parameter to 423 | a one line Python statement. The object is referenced as ``obj`` (not 424 | ``self`` nor ``object``, just ``obj``). 425 | 426 | Example 427 | ^^^^^^^ 428 | 429 | This is a partial example as the Meta subclass is not defined, yet 430 | mandatory (cf. below). 431 | 432 | .. code:: python 433 | 434 | from bungiesearch.fields import DateField, StringField 435 | from bungiesearch.indices import ModelIndex 436 | 437 | class ArticleIndex(ModelIndex): 438 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published') 439 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])') 440 | 441 | Here, both ``effective_date`` and ``meta_data`` will be part of the doc 442 | type mapping, but won't be reversed mapped since those fields do not 443 | exist in the model. 444 | 445 | This can also be used to index foreign keys: 446 | 447 | .. code:: python 448 | 449 | some_field_name = StringField(eval_as='",".join([item for item in obj.some_foreign_relation.values_list("some_field", flat=True)]) if obj.some_foreign_relation else ""') 450 | 451 | Class methods 452 | ~~~~~~~~~~~~~ 453 | 454 | matches\_indexing\_condition 455 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 456 | 457 | Override this function to specify whether an item should be indexed or 458 | not. This is useful when defining multiple indices (and ModelIndex 459 | classes) for a given model. This method's signature and super class code 460 | is as follows, and allows indexing of all items. 461 | 462 | .. code:: python 463 | 464 | def matches_indexing_condition(self, item): 465 | return True 466 | 467 | For example, if a given elasticsearch index should contain only item 468 | whose title starts with ``"Awesome"``, then this method can be 469 | overridden as follows. 470 | 471 | .. code:: python 472 | 473 | def matches_indexing_condition(self, item): 474 | return item.title.startswith("Awesome") 475 | 476 | Meta subclass attributes 477 | ~~~~~~~~~~~~~~~~~~~~~~~~ 478 | 479 | **Note**: in the following, any variable defined a being a ``list`` 480 | could also be a ``tuple``. ##### model *Required:* defines the Django 481 | model for which this ModelIndex is applicable. 482 | 483 | fields 484 | ^^^^^^ 485 | 486 | *Optional:* list of fields (or columns) which must be fetched when 487 | serializing the object for elasticsearch, or when reverse mapping the 488 | object from elasticsearch back to a Django Model instance. By default, 489 | all fields will be fetched. Setting this *will* restrict which fields 490 | can be fetched and may lead to errors when serializing the object. It is 491 | recommended to use the ``exclude`` attribute instead (cf. below). 492 | 493 | exclude 494 | ^^^^^^^ 495 | 496 | *Optional:* list of fields (or columns) which must not be fetched when 497 | serializing or deserializing the object. 498 | 499 | hotfixes 500 | ^^^^^^^^ 501 | 502 | *Optional:* a dictionary whose keys are index fields and whose values 503 | are dictionaries which define `core type 504 | attributes `__. 505 | By default, there aren't any special settings, apart for String fields, 506 | where the 507 | `analyzer `__ 508 | is set to 509 | ```snowball`` `__ 510 | (``{'analyzer': 'snowball'}``). 511 | 512 | additional\_fields 513 | ^^^^^^^^^^^^^^^^^^ 514 | 515 | *Optional:* additional fields to fetch for mapping, may it be for 516 | ``eval_as`` fields or when returning the object from the database. 517 | 518 | id\_field 519 | ^^^^^^^^^ 520 | 521 | *Optional:* the model field to use as a unique ID for elasticsearch's 522 | metadata ``_id``. Defaults to ``id`` (also called 523 | ```pk`` `__). 524 | 525 | updated\_field 526 | ^^^^^^^^^^^^^^ 527 | 528 | *Optional:* set the model's field which can be filtered on dates in 529 | order to find when objects have been updated. Note, this is *mandatory* 530 | to use ``--start`` and/or ``--end`` when updating index (with 531 | ``search_index --update``). 532 | 533 | optimize\_queries 534 | ^^^^^^^^^^^^^^^^^ 535 | 536 | *Optional:* set to True to make efficient queries when automatically 537 | mapping to database objects. This will *always* restrict fetching to the 538 | fields set in ``fields`` and in ``additional_fields``. *Note:* You can 539 | also perform an optimal database query with ``.only('__model')``, which 540 | will use the same fields as ``optimize_queries``, or 541 | ``.only('__fields')``, which will use the fields provided in the 542 | ``.fields()`` call. 543 | 544 | indexing\_query 545 | ^^^^^^^^^^^^^^^ 546 | 547 | *Optional:* set to a QuerySet instance to specify the query used when 548 | the search\_index command is ran to index. This **does not** affect how 549 | each piece of content is indexed. 550 | 551 | default 552 | ^^^^^^^ 553 | 554 | Enables support for a given model to be indexed on several elasticsearch 555 | indices. Set to ``False`` on all but the default index. **Note**: if all 556 | managed models are set with ``default=False`` then Bungiesearch will 557 | fail to find and index that model. 558 | 559 | Example 560 | ~~~~~~~ 561 | 562 | Indexes all objects of ``Article``, as long as their ``updated`` 563 | datetime is less than `21 October 2015 564 | 04:29 `__. 565 | 566 | .. code:: python 567 | 568 | from core.models import Article 569 | from bungiesearch.indices import ModelIndex 570 | from datetime import datetime 571 | 572 | class ArticleIndex(ModelIndex): 573 | 574 | def matches_indexing_condition(self, item): 575 | return item.updated < datetime.datetime(2015, 10, 21, 4, 29) 576 | 577 | class Meta: 578 | model = Article 579 | id_field = 'id' # That's actually the default value, so it's not really needed. 580 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash') 581 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 582 | 'title': {'boost': 1.75}, 583 | 'description': {'boost': 1.35}, 584 | 'full_text': {'boost': 1.125}} 585 | optimize_queries = True 586 | indexing_query = Article.objects.defer(*exclude).select_related().all().prefetch_related('tags') 587 | 588 | SearchAlias 589 | ----------- 590 | 591 | A ``SearchAlias`` define search shortcuts (somewhat similar to `Django 592 | managers `__). 593 | Often times, a given search will be used in multiple parts of the code. 594 | SearchAliases allow you define those queries, filters, or any 595 | bungiesearch/elasticsearch-dsl-py calls as an alias. 596 | 597 | A search alias is either applicable to a ``list`` (or ``tuple``) of 598 | managed models, or to any bungiesearch instance. It's very simple, so 599 | here's an example which is detailed right below. 600 | 601 | Example 602 | ~~~~~~~ 603 | 604 | The most simple implementation of a SearchAlias is as follows. This 605 | search alias can be called via ``Article.objects.bungie_title`` (or 606 | ``Article.objects.search.bungie_title``), supposing that the namespace 607 | is set to ``None`` in the settings (cf. below). 608 | 609 | Definition 610 | ^^^^^^^^^^ 611 | 612 | .. code:: python 613 | 614 | from bungiesearch.aliases import SearchAlias 615 | 616 | class Title(SearchAlias): 617 | def alias_for(self, title): 618 | return self.search_instance.query('match', title=title) 619 | 620 | Usage 621 | ^^^^^ 622 | 623 | .. code:: python 624 | 625 | Article.objects.bungie_title('title') 626 | 627 | Method overwrite 628 | ~~~~~~~~~~~~~~~~ 629 | 630 | Any implementation needs to inherit from 631 | ``bungiesearch.aliases.SearchAlias`` and overwrite ``alias_for``. You 632 | can set as many or as little parameters as you want for that function 633 | (since bungiesearch only return the pointer to that function without 634 | actually calling it). 635 | 636 | Since each managed model has its own doc type, ``self.search_instance`` 637 | is a bungiesearch instance set to search the specific doctype. 638 | 639 | Meta subclass attributes 640 | ~~~~~~~~~~~~~~~~~~~~~~~~ 641 | 642 | Although not mandatory, the ``Meta`` subclass enabled custom naming and 643 | model restrictions for a search alias. 644 | 645 | models 646 | ^^^^^^ 647 | 648 | *Optional:* ``list`` (or ``tuple``) of Django models which are allowed 649 | to use this search alias. If a model which is not allowed to use this 650 | SearchAlias tries it, a ``ValueError`` will be raised. 651 | 652 | alias\_name 653 | ^^^^^^^^^^^ 654 | 655 | *Optional:* A string corresponding the suffix name of this search alias. 656 | Defaults to the lower case class name. 657 | 658 | **WARNING**: As explained in the "Settings" section below, all search 659 | aliases in a given module share the prefix (or namespace). This is to 660 | prevent aliases from accidently overwriting Django manager function 661 | (e.g. ``update`` or ``get``). In other words, if you define the 662 | ``alias_name`` to ``test``, then it must be called as 663 | ``model_obj.objects.$prefix$_test`` where ``$prefix$`` is the prefix 664 | defined in the settings. This prefix is also applicable to search 665 | aliases which are available via bungiesearch instances directly. Hence, 666 | one can define in one module search utilities (e.g. ``regex`` and 667 | ``range``) and define model specific aliases (e.g. ``title``) in another 668 | module, and use both in conjunction as such: 669 | ``Article.objects.search.bungie_title('search title').utils_range(field='created', gte='2014-05-20', as_query=True)``. 670 | These aliases can be concatenated ad vitam aeternam. 671 | 672 | Sophisticated example 673 | ~~~~~~~~~~~~~~~~~~~~~ 674 | 675 | This example shows that we can have some fun with search aliases. In 676 | this case, we define a Range alias which is applicable to any field on 677 | any model. 678 | 679 | .. code:: python 680 | 681 | class Range(SearchAlias): 682 | def alias_for(self, field, gte=None, lte=None, boost=None, as_query=False): 683 | body = {field: {}} 684 | if gte: 685 | body[field]['gte'] = gte 686 | if lte: 687 | body[field]['lte'] = lte 688 | if boost: 689 | if not as_query: 690 | logging.warning('Boost is not applicable to search alias Range when not used as a query.') 691 | else: 692 | body[field]['boost'] = boost 693 | if as_query: 694 | return self.search_instance.query({'range': body}) 695 | return self.search_instance.filter({'range': body}) 696 | 697 | We can use it as such 698 | ``Article.objects.bungie_range(field='created', gte='2014-05-20', as_query=True)``. 699 | 700 | Settings 701 | -------- 702 | Add 'bungiesearch' to INSTALLED_APPS. 703 | 704 | You must define ``BUNGIESEARCH`` in your Django settings in order for 705 | bungiesearch to know elasticsearch URL(s) and which index name contains 706 | mappings for each ModelIndex. 707 | 708 | .. code:: python 709 | 710 | BUNGIESEARCH = { 711 | 'URLS': ['localhost'], # No leading http:// or the elasticsearch client will complain. 712 | 'INDICES': {'main_index': 'myproject.myapp.myindices'} # Must be a module path. 713 | 'ALIASES': {'bsearch': 'myproject.search_aliases'}, 714 | 'SIGNALS': {'BUFFER_SIZE': 1}, 715 | 'TIMEOUT': 5 716 | } 717 | 718 | URLS 719 | ~~~~ 720 | 721 | *Required:* must be a list of URLs which host elasticsearch instance(s). 722 | This is directly sent to elasticsearch-dsl-py, so any issue with 723 | multiple URLs should be refered to them. 724 | 725 | INDICES 726 | ~~~~~~~ 727 | 728 | *Required:* must be a dictionary where each key is the name of an 729 | elasticsearch index and each value is a path to a Python module 730 | containing classes which inherit from 731 | ``bungiesearch.indices.ModelIndex`` (cf. below). 732 | 733 | ALIASES 734 | ~~~~~~~ 735 | 736 | *Optional:* a dictionary whose key is the alias namespace and whose 737 | value is the Python module containing classes which inherit from 738 | ``bungiesearch.aliases.SearchAlias``. If the namespace is ``None``, then 739 | the alias will be named ``bungie``. If the namespace is an empty string, 740 | there will be no alias namespace. The provided namespace will be 741 | appended by an underscore. In the example above, each search alias 742 | defined in ``myproject.search_aliases`` will be referenced as 743 | ``$ModelObj$.objects.bsearch_$alias$``, where ``$ModelObj$`` is a Django 744 | model and ``$alias$`` is the name of the search alias. 745 | 746 | The purpose is to not accidently overwrite Django's default manager 747 | functions with search aliases. 748 | 749 | SIGNALS 750 | ~~~~~~~ 751 | 752 | *Optional:* if it exists, it must be a dictionary (even empty), and will 753 | connect to the ``post save`` and ``pre delete`` model functions of *all* 754 | models using ``bungiesearch.managers.BungiesearchManager`` as a manager. 755 | One may also define a signal processor class for more custom 756 | functionality by placing the string value of the module path under a key 757 | called ``SIGNAL_CLASS`` in the dictionary value of ``SIGNALS`` and 758 | defining ``setup`` and ``teardown`` methods, which take ``model`` as the 759 | only parameter. These methods connect and disconnect the signal 760 | processing class to django signals (signals are connected to each model 761 | which uses a BungiesearchManager). 762 | 763 | If ``SIGNALS`` is not defined in the settings, *none* of the models 764 | managed by BungiesearchManager will automatically update the index when 765 | a new item is created or deleted. 766 | 767 | BUFFER\_SIZE 768 | ^^^^^^^^^^^^ 769 | 770 | *Optional:* an integer representing the number of items to buffer before 771 | making a bulk index update, defaults to ``100``. 772 | 773 | **WARNING**: if your application is shut down before the buffer is 774 | emptied, then any buffered instance *will not* be indexed on 775 | elasticsearch. Hence, a possibly better implementation is wrapping 776 | ``post_save_connector`` and ``pre_delete_connector`` from 777 | ``bungiesearch.signals`` in a celery task. It is not implemented as such 778 | here in order to not require ``celery``. 779 | 780 | TIMEOUT 781 | ~~~~~~~ 782 | 783 | *Optional:* Elasticsearch connection timeout in seconds. Defaults to 784 | ``5``. 785 | 786 | Testing 787 | ======= 788 | 789 | The easiest way to run the tests is to install all dev dependencies using 790 | ``./setup.sh`` then run ``./test.sh`` 791 | 792 | All Bungiesearch tests are in ``tests/core/test_bungiesearch.py``. You 793 | can run the tests by creating a Python virtual environment, installing 794 | the requirements from ``requirements.txt``, installing the package 795 | (``pip install .``) and running ``python tests/manage.py test``. Make 796 | sure to update ``tests/settings.py`` to use your own elasticsearch URLs, 797 | or update the ELASTIC\_SEARCH\_URL environment variable. 798 | 799 | .. |Build Status| image:: https://travis-ci.org/ChristopherRabotin/bungiesearch.svg?branch=master 800 | :target: https://travis-ci.org/ChristopherRabotin/bungiesearch 801 | .. |Coverage Status| image:: https://coveralls.io/repos/ChristopherRabotin/bungiesearch/badge.svg?branch=master&service=github 802 | :target: https://coveralls.io/github/ChristopherRabotin/bungiesearch?branch=master 803 | -------------------------------------------------------------------------------- /bungiesearch/__init__.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from importlib import import_module 3 | 4 | from django.conf import settings 5 | from elasticsearch.client import Elasticsearch 6 | from elasticsearch_dsl.search import Search 7 | from six import iteritems, itervalues, string_types 8 | 9 | from .aliases import SearchAlias 10 | from .indices import ModelIndex 11 | from .logger import logger 12 | 13 | 14 | class Bungiesearch(Search): 15 | ''' 16 | This object is used to read Django settings and initialize the elasticsearch connection. 17 | ''' 18 | DEFAULT_TIMEOUT = 5 19 | BUNGIE = settings.BUNGIESEARCH 20 | 21 | # The following code loads each model index_name module (as defined in the settings) and stores 22 | # index_name name to model index_name, and index_name name to model. Settings shouldn't change between 23 | # subsequent calls to Search(), which is why this is static code. 24 | 25 | _cached_es_instances = {} 26 | # Let's go through the settings in order to map each defined Model/ModelIndex to the elasticsearch index_name. 27 | _model_to_index, _model_name_to_index, _model_name_to_model_idx = defaultdict(list), defaultdict(list), defaultdict(list) 28 | _index_to_model, _idx_name_to_mdl_to_mdlidx = defaultdict(list), defaultdict(dict) 29 | _model_name_to_default_index, _alias_hooks = {}, {} 30 | _managed_models = [] 31 | __loaded_indices__ = False 32 | 33 | @classmethod 34 | def __load_settings__(cls): 35 | if cls.__loaded_indices__: 36 | return 37 | cls.__loaded_indices__ = True 38 | 39 | # Loading indices. 40 | for index_name, module_str in iteritems(cls.BUNGIE['INDICES']): 41 | index_module = import_module(module_str) 42 | for index_obj in itervalues(index_module.__dict__): 43 | try: 44 | if issubclass(index_obj, ModelIndex) and index_obj != ModelIndex: 45 | index_instance = index_obj() 46 | assoc_model = index_instance.get_model() 47 | cls._index_to_model[index_name].append(assoc_model) 48 | cls._model_name_to_model_idx[assoc_model.__name__].append(index_instance) 49 | cls._idx_name_to_mdl_to_mdlidx[index_name][assoc_model.__name__] = index_instance 50 | if index_instance.is_default: 51 | if assoc_model.__name__ in cls._model_name_to_default_index: 52 | raise AttributeError('ModelIndex {} on index {} is marked as default, but {} was already set as default.'.format(index_instance, index_name, cls._model_name_to_default_index[assoc_model.__name__])) 53 | cls._model_name_to_default_index[assoc_model.__name__] = index_instance 54 | except TypeError: 55 | pass # Oops, just attempted to get subclasses of a non-class. 56 | 57 | # Create reverse maps in order to have O(1) access. 58 | for index_name, models in iteritems(cls._index_to_model): 59 | for model in models: 60 | cls._model_to_index[model].append(index_name) 61 | cls._model_name_to_index[model.__name__].append(index_name) 62 | 63 | # Loading aliases. 64 | for alias_prefix, module_str in iteritems(cls.BUNGIE.get('ALIASES', {})): 65 | if alias_prefix is None: 66 | alias_prefix = 'bungie' 67 | if alias_prefix != '': 68 | alias_prefix += '_' 69 | alias_module = import_module(module_str) 70 | for alias_obj in itervalues(alias_module.__dict__): 71 | try: 72 | if issubclass(alias_obj, SearchAlias) and alias_obj != SearchAlias: 73 | alias_instance = alias_obj() 74 | cls._alias_hooks[alias_prefix + alias_instance.alias_name] = alias_instance 75 | except TypeError: 76 | pass # Oops, just attempted to get subclasses of a non-class. 77 | 78 | @classmethod 79 | def _build_key(cls, urls, timeout, **settings): 80 | # Order the settings by key and then turn it into a string with 81 | # repr. There are a lot of edge cases here, but the worst that 82 | # happens is that the key is different and so you get a new 83 | # Elasticsearch. We'll probably have to tweak this. 84 | settings = sorted(settings.items(), key=lambda item: item[0]) 85 | settings = repr([(k, v) for k, v in settings]) 86 | # elasticsearch allows URLs to be a string, so we make sure to 87 | # account for that when converting whatever it is into a tuple. 88 | if isinstance(urls, string_types): 89 | urls = (urls,) 90 | else: 91 | urls = tuple(urls) 92 | # Generate a tuple of all the bits and return that as the key 93 | # because that's hashable. 94 | key = (urls, timeout, settings) 95 | return key 96 | 97 | @classmethod 98 | def get_index(cls, model, via_class=False): 99 | ''' 100 | Returns the index name (as a string) for the given model as a class or a string. 101 | :param model: model name or model class if via_class set to True. 102 | :param via_class: set to True if parameter model is a class. 103 | :raise KeyError: If the provided model does not have any index associated. 104 | ''' 105 | try: 106 | return cls._model_to_index[model] if via_class else cls._model_name_to_index[model] 107 | except KeyError: 108 | raise KeyError('Could not find any index defined for model {}. Is the model in one of the model index modules of BUNGIESEARCH["INDICES"]?'.format(model)) 109 | 110 | @classmethod 111 | def get_model_index(cls, model, default=True): 112 | ''' 113 | Returns the default model index for the given model, or the list of indices if default is False. 114 | :param model: model name as a string. 115 | :raise KeyError: If the provided model does not have any index associated. 116 | ''' 117 | try: 118 | if default: 119 | return cls._model_name_to_default_index[model] 120 | return cls._model_name_to_model_idx[model] 121 | except KeyError: 122 | raise KeyError('Could not find any model index defined for model {}.'.format(model)) 123 | 124 | @classmethod 125 | def get_indices(cls): 126 | ''' 127 | Returns the list of indices defined in the settings. 128 | ''' 129 | return cls._idx_name_to_mdl_to_mdlidx.keys() 130 | 131 | @classmethod 132 | def get_models(cls, index, as_class=False): 133 | ''' 134 | Returns the list of models defined for this index. 135 | :param index: index name. 136 | :param as_class: set to True to return the model as a model object instead of as a string. 137 | ''' 138 | try: 139 | return cls._index_to_model[index] if as_class else cls._idx_name_to_mdl_to_mdlidx[index].keys() 140 | except KeyError: 141 | raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index)) 142 | 143 | @classmethod 144 | def get_model_indices(cls, index): 145 | ''' 146 | Returns the list of model indices (i.e. ModelIndex objects) defined for this index. 147 | :param index: index name. 148 | ''' 149 | try: 150 | return cls._idx_name_to_mdl_to_mdlidx[index].values() 151 | except KeyError: 152 | raise KeyError('Could not find any index named {}. Is this index defined in BUNGIESEARCH["INDICES"]?'.format(index)) 153 | 154 | @classmethod 155 | def map_raw_results(cls, raw_results, instance=None): 156 | ''' 157 | Maps raw results to database model objects. 158 | :param raw_results: list raw results as returned from elasticsearch-dsl-py. 159 | :param instance: Bungiesearch instance if you want to make use of `.only()` or `optmize_queries` as defined in the ModelIndex. 160 | :return: list of mapped results in the *same* order as returned by elasticsearch. 161 | ''' 162 | # Let's iterate over the results and determine the appropriate mapping. 163 | model_results = defaultdict(list) 164 | # Initializing the list to the number of returned results. This allows us to restore each item in its position. 165 | if hasattr(raw_results, 'hits'): 166 | results = [None] * len(raw_results.hits) 167 | else: 168 | results = [None] * len(raw_results) 169 | found_results = {} 170 | for pos, result in enumerate(raw_results): 171 | model_name = result.meta.doc_type 172 | if model_name not in Bungiesearch._model_name_to_index or result.meta.index not in Bungiesearch._model_name_to_index[model_name]: 173 | logger.warning('Returned object of type {} ({}) is not defined in the settings, or is not associated to the same index as in the settings.'.format(model_name, result)) 174 | results[pos] = result 175 | else: 176 | meta = Bungiesearch.get_model_index(model_name).Meta 177 | model_results['{}.{}'.format(result.meta.index, model_name)].append(result.meta.id) 178 | found_results['{1.meta.index}.{0}.{1.meta.id}'.format(model_name, result)] = (pos, result.meta) 179 | 180 | # Now that we have model ids per model name, let's fetch everything at once. 181 | for ref_name, ids in iteritems(model_results): 182 | index_name, model_name = ref_name.split('.') 183 | model_idx = Bungiesearch._idx_name_to_mdl_to_mdlidx[index_name][model_name] 184 | model_obj = model_idx.get_model() 185 | items = model_obj.objects.filter(pk__in=ids) 186 | if instance: 187 | if instance._only == '__model' or model_idx.optimize_queries: 188 | desired_fields = model_idx.fields_to_fetch 189 | elif instance._only == '__fields': 190 | desired_fields = instance._fields 191 | else: 192 | desired_fields = instance._only 193 | 194 | if desired_fields: # Prevents setting the database fetch to __fields but not having specified any field to elasticsearch. 195 | items = items.only( 196 | *[field.name 197 | for field in model_obj._meta.get_fields() 198 | # For complete backwards compatibility, you may want to exclude 199 | # GenericForeignKey from the results. 200 | if field.name in desired_fields and \ 201 | not (field.many_to_one and field.related_model is None) 202 | ] 203 | ) 204 | # Let's reposition each item in the results and set the _searchmeta meta information. 205 | for item in items: 206 | pos, meta = found_results['{}.{}.{}'.format(index_name, model_name, item.pk)] 207 | item._searchmeta = meta 208 | results[pos] = item 209 | 210 | return results 211 | 212 | def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs): 213 | ''' 214 | Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool 215 | if it has already been initialized. Otherwise, creates a new one. 216 | 217 | If no parameters are passed, everything is determined from the Django settings. 218 | 219 | :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings. 220 | :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`. 221 | :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`. 222 | :param timeout: Timeout used in the connection. 223 | :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings. 224 | :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search. 225 | ''' 226 | 227 | Bungiesearch.__load_settings__() 228 | 229 | urls = urls or Bungiesearch.BUNGIE['URLS'] 230 | if not timeout: 231 | timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT) 232 | 233 | search_keys = ['using', 'index', 'doc_type', 'extra'] 234 | search_settings, es_settings = {}, {} 235 | for k, v in iteritems(kwargs): 236 | if k in search_keys: 237 | search_settings[k] = v 238 | else: 239 | es_settings[k] = v 240 | 241 | if not es_settings: 242 | # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings. 243 | es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {}) 244 | 245 | # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance). 246 | cache_key = Bungiesearch._build_key(urls, timeout, **es_settings) 247 | es_instance = None 248 | if not force_new: 249 | if cache_key in Bungiesearch._cached_es_instances: 250 | es_instance = Bungiesearch._cached_es_instances[cache_key] 251 | 252 | if not es_instance: 253 | es_instance = Elasticsearch(urls, timeout=timeout, **es_settings) 254 | Bungiesearch._cached_es_instances[cache_key] = es_instance 255 | 256 | if 'using' not in search_settings: 257 | search_settings['using'] = es_instance 258 | 259 | super(Bungiesearch, self).__init__(**search_settings) 260 | 261 | # Creating instance attributes. 262 | self._only = [] # Stores the exact fields to fetch from the database when mapping. 263 | self.results = [] # Store the mapped and unmapped results. 264 | self._raw_results_only = raw_results 265 | 266 | def _clone(self): 267 | ''' 268 | Must clone additional fields to those cloned by elasticsearch-dsl-py. 269 | ''' 270 | instance = super(Bungiesearch, self)._clone() 271 | instance._raw_results_only = self._raw_results_only 272 | return instance 273 | 274 | def get_es_instance(self): 275 | ''' 276 | Returns the low level elasticsearch instance to perform low level operations. 277 | ''' 278 | return self._using 279 | 280 | def execute_raw(self): 281 | self.raw_results = super(Bungiesearch, self).execute() 282 | 283 | def execute(self, return_results=True): 284 | ''' 285 | Executes the query and attempts to create model objects from results. 286 | ''' 287 | if self.results: 288 | return self.results if return_results else None 289 | 290 | self.execute_raw() 291 | 292 | if self._raw_results_only: 293 | self.results = self.raw_results 294 | else: 295 | self.map_results() 296 | 297 | if return_results: 298 | return self.results 299 | 300 | def map_results(self): 301 | ''' 302 | Maps raw results and store them. 303 | ''' 304 | self.results = Bungiesearch.map_raw_results(self.raw_results, self) 305 | 306 | def only(self, *fields): 307 | ''' 308 | Restricts the fields to be fetched when mapping. Set to `__model` to fetch all fields define in the ModelIndex. 309 | ''' 310 | s = self._clone() 311 | if len(fields) == 1 and fields[0] == '__model': 312 | s._only = '__model' 313 | else: 314 | s._only = fields 315 | return s 316 | 317 | def __iter__(self): 318 | ''' 319 | Allows iterating on the response. 320 | ''' 321 | self.execute() 322 | return iter(self.results) 323 | 324 | def __len__(self): 325 | ''' 326 | Return elasticsearch-dsl-py count. 327 | ''' 328 | return self.count() 329 | 330 | def __getitem__(self, key): 331 | ''' 332 | Overwriting the step in slice. It is used to set the results either as elasticsearch-dsl-py response object, or 333 | attempt to fetch the Django model instance. 334 | :warning: Getting an item will execute this search. Any search operation or field setting *must* be done prior to getting an item. 335 | ''' 336 | if isinstance(key, slice): 337 | if key.step is not None: 338 | self._raw_results_only = key.step 339 | if key.start is not None and key.stop is not None: 340 | single_item = key.start - key.stop == -1 341 | elif key.start is None and key.stop == 1: 342 | single_item = True 343 | else: 344 | single_item = False 345 | key = slice(key.start, key.stop) 346 | else: 347 | single_item = False 348 | else: 349 | single_item = True 350 | results = super(Bungiesearch, self).__getitem__(key).execute() 351 | if single_item: 352 | try: 353 | return results[0] 354 | except IndexError: 355 | return [] 356 | return results 357 | 358 | def hook_alias(self, alias, model_obj=None): 359 | ''' 360 | Returns the alias function, if it exists and if it can be applied to this model. 361 | ''' 362 | try: 363 | search_alias = self._alias_hooks[alias] 364 | except KeyError: 365 | raise AttributeError('Could not find search alias named {}. Is this alias defined in BUNGIESEARCH["ALIASES"]?'.format(alias)) 366 | else: 367 | if search_alias._applicable_models and \ 368 | ((model_obj and model_obj not in search_alias._applicable_models) or \ 369 | not any([app_model_obj.__name__ in self._doc_type for app_model_obj in search_alias._applicable_models])): 370 | raise ValueError('Search alias {} is not applicable to model/doc_types {}.'.format(alias, model_obj if model_obj else self._doc_type)) 371 | return search_alias.prepare(self, model_obj).alias_for 372 | 373 | def __getattr__(self, alias): 374 | ''' 375 | Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__), 376 | this is only called as a last resort in case the attribute is not found. 377 | ''' 378 | return self.hook_alias(alias) 379 | -------------------------------------------------------------------------------- /bungiesearch/aliases.py: -------------------------------------------------------------------------------- 1 | class SearchAlias(object): 2 | ''' 3 | Defines search aliases for specific models. Essentially works like Django Managers but for Bungiesearch. 4 | These work for both managers and bungiesearch instances. See the docs (and if they aren't clear, open an issue). 5 | ''' 6 | def __init__(self): 7 | # Introspect the model, adding/removing fields as needed. 8 | # Adds/Excludes should happen only if the fields are not already 9 | # defined in `self.fields`. 10 | self._classname = type(self).__name__ 11 | try: 12 | _meta = getattr(self, 'Meta') 13 | except AttributeError: 14 | self._applicable_models = [] 15 | self.alias_name = self._classname.lower() 16 | else: 17 | self._applicable_models = getattr(_meta, 'models', None) 18 | self.alias_name = getattr(_meta, 'alias_name', self._classname.lower()) 19 | self.search_instance = None 20 | self.model = None 21 | 22 | def _clone(self): 23 | s = self.__class__() 24 | s._classname = self._classname 25 | s._applicable_models = self._applicable_models 26 | s.alias_name = self.alias_name 27 | return s 28 | 29 | def prepare(self, search_instance, model_obj): 30 | s = self._clone() 31 | s.search_instance = search_instance 32 | s.model = model_obj 33 | return s 34 | 35 | def alias_for(self, **kwargs): 36 | raise NotImplementedError('{} does not provide an implementation for alias_for.'.format(self._classname)) 37 | 38 | def get_model(self): 39 | if self.model: 40 | return self.model 41 | if self.search_instance._doc_type and len(self.search_instance._doc_type) == 1: 42 | idxes = self.search_instance._model_name_to_model_idx[self.search_instance._doc_type[0]] 43 | first_mdl = idxes[0].get_model() 44 | if all(mdlidx.get_model() == first_mdl for mdlidx in idxes[1:]): 45 | return first_mdl 46 | raise ValueError('SearchAlias {} is associated to more than one index, and the model is differs between indices!') 47 | raise ValueError('Instance associated to zero doc types or more than one.') 48 | -------------------------------------------------------------------------------- /bungiesearch/fields.py: -------------------------------------------------------------------------------- 1 | from django.template import Context, loader 2 | from django.template.defaultfilters import striptags 3 | from six import iteritems 4 | 5 | from elasticsearch_dsl.analysis import Analyzer 6 | 7 | 8 | class AbstractField(object): 9 | ''' 10 | Represents an elasticsearch index field and values from given objects. 11 | Currently does not support binary fields, but those can be created by manually providing a dictionary. 12 | 13 | Values are extracted using the `model_attr` or `eval_as` attribute. 14 | ''' 15 | meta_fields = ['_index', '_uid', '_type', '_id'] 16 | common_fields = ['index_name', 'store', 'index', 'boost', 'null_value', 'copy_to', 'type', 'fields'] 17 | @property 18 | def fields(self): 19 | try: 20 | return self.fields 21 | except: 22 | raise NotImplementedError('Allowed fields are not defined.') 23 | 24 | @property 25 | def coretype(self): 26 | try: 27 | return self.coretype 28 | except: 29 | raise NotImplementedError('Core type is not defined!') 30 | 31 | @property 32 | def defaults(self): 33 | ''' 34 | Stores default values. 35 | ''' 36 | try: 37 | return self.defaults 38 | except: 39 | return {} 40 | 41 | def __init__(self, **args): 42 | ''' 43 | Performs several checks to ensure that the provided attributes are valid. Will not check their values. 44 | ''' 45 | if isinstance(self.coretype, list): 46 | if 'coretype' not in args: 47 | raise KeyError('{} can be represented as one of the following types: {}. Specify which to select as the `coretype` parameter.'.format(unicode(self), ', '.join(self.coretype))) 48 | if args['coretype'] not in self.coretype: 49 | raise KeyError('Core type {} is not supported by {}.'.format(args['coretype'], unicode(self))) 50 | self.type = args.pop('coretype') 51 | else: 52 | self.type = self.coretype 53 | 54 | self.model_attr = args.pop('model_attr', None) 55 | self.eval_func = args.pop('eval_as', None) 56 | self.template_name = args.pop('template', None) 57 | 58 | for attr, value in iteritems(args): 59 | if attr not in self.fields and attr not in AbstractField.common_fields: 60 | raise KeyError('Attribute `{}` is not allowed for core type {}.'.format(attr, self.coretype)) 61 | setattr(self, attr, value) 62 | 63 | for attr, value in iteritems(self.defaults): 64 | if not hasattr(self, attr): 65 | setattr(self, attr, value) 66 | 67 | def value(self, obj): 68 | ''' 69 | Computes the value of this field to update the index. 70 | :param obj: object instance, as a dictionary or as a model instance. 71 | ''' 72 | if self.template_name: 73 | t = loader.select_template([self.template_name]) 74 | return t.render(Context({'object': obj})) 75 | 76 | if self.eval_func: 77 | try: 78 | return eval(self.eval_func) 79 | except Exception as e: 80 | raise type(e)('Could not compute value of {} field (eval_as=`{}`): {}.'.format(unicode(self), self.eval_func, unicode(e))) 81 | 82 | elif self.model_attr: 83 | if isinstance(obj, dict): 84 | return obj[self.model_attr] 85 | current_obj = getattr(obj, self.model_attr) 86 | 87 | if callable(current_obj): 88 | return current_obj() 89 | else: 90 | return current_obj 91 | 92 | else: 93 | raise KeyError('{0} gets its value via a model attribute, an eval function, a template, or is prepared in a method ' 94 | 'call but none of `model_attr`, `eval_as,` `template,` `prepare_{0}` is provided.'.format(unicode(self))) 95 | 96 | def json(self): 97 | json = {} 98 | for attr, val in iteritems(self.__dict__): 99 | if attr in ('eval_func', 'model_attr', 'template_name'): 100 | continue 101 | elif attr in ('analyzer', 'index_analyzer', 'search_analyzer') and isinstance(val, Analyzer): 102 | json[attr] = val.to_dict() 103 | else: 104 | json[attr] = val 105 | 106 | return json 107 | 108 | # All the following definitions could probably be done with better polymorphism. 109 | class StringField(AbstractField): 110 | coretype = 'string' 111 | fields = ['doc_values', 'term_vector', 'norms', 'index_options', 'analyzer', 'index_analyzer', 'search_analyzer', 'include_in_all', 'ignore_above', 'position_offset_gap', 'fielddata', 'similarity'] 112 | defaults = {'analyzer': 'snowball'} 113 | 114 | def value(self, obj): 115 | val = super(StringField, self).value(obj) 116 | if val is None: 117 | return None 118 | return striptags(val) 119 | 120 | def __unicode__(self): 121 | return 'StringField' 122 | 123 | class NumberField(AbstractField): 124 | coretype = ['float', 'double', 'byte', 'short', 'integer', 'long'] 125 | fields = ['doc_values', 'precision_step', 'include_in_all', 'ignore_malformed', 'coerce'] 126 | 127 | def __unicode__(self): 128 | return 'NumberField' 129 | 130 | class DateField(AbstractField): 131 | coretype = 'date' 132 | fields = ['format', 'doc_values', 'precision_step', 'include_in_all', 'ignore_malformed'] 133 | 134 | def __unicode__(self): 135 | return 'DateField' 136 | 137 | class BooleanField(AbstractField): 138 | coretype = 'boolean' 139 | fields = [] # No specific fields. 140 | 141 | def __unicode__(self): 142 | return 'BooleanField' 143 | 144 | # Correspondence between a Django field and an elasticsearch field. 145 | def django_field_to_index(field, **attr): 146 | ''' 147 | Returns the index field type that would likely be associated with each Django type. 148 | ''' 149 | 150 | dj_type = field.get_internal_type() 151 | 152 | if dj_type in ('DateField', 'DateTimeField'): 153 | return DateField(**attr) 154 | elif dj_type in ('BooleanField', 'NullBooleanField'): 155 | return BooleanField(**attr) 156 | elif dj_type in ('DecimalField', 'FloatField'): 157 | return NumberField(coretype='float', **attr) 158 | elif dj_type in ('PositiveSmallIntegerField', 'SmallIntegerField'): 159 | return NumberField(coretype='short', **attr) 160 | elif dj_type in ('IntegerField', 'PositiveIntegerField', 'AutoField'): 161 | return NumberField(coretype='integer', **attr) 162 | elif dj_type in ('BigIntegerField'): 163 | return NumberField(coretype='long', **attr) 164 | 165 | return StringField(**attr) 166 | -------------------------------------------------------------------------------- /bungiesearch/indices.py: -------------------------------------------------------------------------------- 1 | from six import iteritems, text_type 2 | 3 | from elasticsearch_dsl.analysis import Analyzer 4 | 5 | from .fields import AbstractField, django_field_to_index 6 | from .logger import logger 7 | 8 | 9 | class ModelIndex(object): 10 | ''' 11 | Introspects a model to generate an indexable mapping and methods to extract objects. 12 | Supports custom fields, including Python code, and all elasticsearch field types (apart from binary type). 13 | 14 | ModelIndex does efficient querying by only fetching from the database fields which are to be indexed. 15 | 16 | How to create an index? 17 | 18 | 1. Create a class which inherits from ModelIndex. 19 | 2. Define custom indexed fields as class attributes. Values must be instances AbstractField. Important info in 3b. 20 | 3. Define a `Meta` subclass, which must contain at least `model` as a class attribute. 21 | a. Optional class attributes: `fields`, `excludes` and `additional_fields`. 22 | b. If custom indexed field requires model attributes which are not in the difference between `fields` and `excludes`, these must be defined in `additional_fields`. 23 | ''' 24 | def __init__(self): 25 | # Introspect the model, adding/removing fields as needed. 26 | # Adds/Excludes should happen only if the fields are not already 27 | # defined in `self.fields`. 28 | try: 29 | _meta = getattr(self, 'Meta') 30 | except AttributeError: 31 | raise AttributeError('ModelIndex {} does not contain a Meta class.'.format(self.__class__.__name__)) 32 | 33 | self.model = getattr(_meta, 'model', None) 34 | self.fields = {} 35 | fields = getattr(_meta, 'fields', []) 36 | excludes = getattr(_meta, 'exclude', []) 37 | hotfixes = getattr(_meta, 'hotfixes', {}) 38 | additional_fields = getattr(_meta, 'additional_fields', []) 39 | id_field = getattr(_meta, 'id_field', 'id') 40 | self.updated_field = getattr(_meta, 'updated_field', None) 41 | self.optimize_queries = getattr(_meta, 'optimize_queries', False) 42 | self.is_default = getattr(_meta, 'default', True) 43 | self.indexing_query = getattr(_meta, 'indexing_query', None) 44 | 45 | # Add in fields from the model. 46 | self.fields.update(self._get_fields(fields, excludes, hotfixes)) 47 | # Elasticsearch uses '_id' to identify items uniquely, so let's duplicate that field. 48 | # We're duplicating it in order for devs to still perform searches on `.id` as expected. 49 | self.fields_to_fetch = list(set(self.fields.keys()).union(additional_fields)) 50 | 51 | # Adding or updating the fields which are defined at class level. 52 | for cls_attr, obj in iteritems(self.__class__.__dict__): 53 | if not isinstance(obj, AbstractField): 54 | continue 55 | 56 | if cls_attr in self.fields: 57 | logger.info('Overwriting implicitly defined model field {} ({}) its explicit definition: {}.'.format(cls_attr, text_type(self.fields[cls_attr]), text_type(obj))) 58 | self.fields[cls_attr] = obj 59 | 60 | self.fields['_id'] = self.fields[id_field] 61 | 62 | def matches_indexing_condition(self, item): 63 | ''' 64 | Returns True by default to index all documents. 65 | ''' 66 | return True 67 | 68 | def get_model(self): 69 | return self.model 70 | 71 | def get_mapping(self, meta_fields=True): 72 | ''' 73 | Returns the mapping for the index as a dictionary. 74 | 75 | :param meta_fields: Also include elasticsearch meta fields in the dictionary. 76 | :return: a dictionary which can be used to generate the elasticsearch index mapping for this doctype. 77 | ''' 78 | return {'properties': dict((name, field.json()) for name, field in iteritems(self.fields) if meta_fields or name not in AbstractField.meta_fields)} 79 | 80 | def collect_analysis(self): 81 | ''' 82 | :return: a dictionary which is used to get the serialized analyzer definition from the analyzer class. 83 | ''' 84 | analysis = {} 85 | for field in self.fields.values(): 86 | for analyzer_name in ('analyzer', 'index_analyzer', 'search_analyzer'): 87 | if not hasattr(field, analyzer_name): 88 | continue 89 | 90 | analyzer = getattr(field, analyzer_name) 91 | 92 | if not isinstance(analyzer, Analyzer): 93 | continue 94 | 95 | definition = analyzer.get_analysis_definition() 96 | if definition is None: 97 | continue 98 | 99 | for key in definition: 100 | analysis.setdefault(key, {}).update(definition[key]) 101 | 102 | return analysis 103 | 104 | def serialize_object(self, obj, obj_pk=None): 105 | ''' 106 | Serializes an object for it to be added to the index. 107 | 108 | :param obj: Object to be serialized. Optional if obj_pk is passed. 109 | :param obj_pk: Object primary key. Superseded by `obj` if available. 110 | :return: A dictionary representing the object as defined in the mapping. 111 | ''' 112 | if not obj: 113 | try: 114 | # We're using `filter` followed by `values` in order to only fetch the required fields. 115 | obj = self.model.objects.filter(pk=obj_pk).values(*self.fields_to_fetch)[0] 116 | except Exception as e: 117 | raise ValueError('Could not find object of primary key = {} in model {} (model index class {}). (Original exception: {}.)'.format(obj_pk, self.model, self.__class__.__name__, e)) 118 | 119 | serialized_object = {} 120 | 121 | for name, field in iteritems(self.fields): 122 | if hasattr(self, "prepare_%s" % name): 123 | value = getattr(self, "prepare_%s" % name)(obj) 124 | else: 125 | value = field.value(obj) 126 | 127 | serialized_object[name] = value 128 | 129 | return serialized_object 130 | 131 | def _get_fields(self, fields, excludes, hotfixes): 132 | ''' 133 | Given any explicit fields to include and fields to exclude, add 134 | additional fields based on the associated model. If the field needs a hotfix, apply it. 135 | ''' 136 | final_fields = {} 137 | fields = fields or [] 138 | excludes = excludes or [] 139 | 140 | for f in self.model._meta.fields: 141 | # If the field name is already present, skip 142 | if f.name in self.fields: 143 | continue 144 | 145 | # If field is not present in explicit field listing, skip 146 | if fields and f.name not in fields: 147 | continue 148 | 149 | # If field is in exclude list, skip 150 | if excludes and f.name in excludes: 151 | continue 152 | 153 | # If field is a relation, skip. 154 | if getattr(f, 'rel'): 155 | continue 156 | 157 | attr = {'model_attr': f.name} 158 | if f.has_default(): 159 | attr['null_value'] = f.default 160 | 161 | if f.name in hotfixes: 162 | attr.update(hotfixes[f.name]) 163 | 164 | final_fields[f.name] = django_field_to_index(f, **attr) 165 | 166 | return final_fields 167 | 168 | def __str__(self): 169 | return '<{0.__class__.__name__}:{0.model.__name__}>'.format(self) 170 | -------------------------------------------------------------------------------- /bungiesearch/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger('bungiesearch') 4 | -------------------------------------------------------------------------------- /bungiesearch/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/bungiesearch/management/__init__.py -------------------------------------------------------------------------------- /bungiesearch/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Commands allow you to manage the index. 3 | ''' -------------------------------------------------------------------------------- /bungiesearch/management/commands/_utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def add_arguments(obj, parser): 4 | parser.add_argument( 5 | '--noinput', 6 | action='store_false', 7 | dest='interactive', 8 | default=True, 9 | help='If provided, no prompts will be issued to the user and the data will be wiped out' 10 | ) 11 | parser.add_argument( 12 | '--guilty-as-charged', 13 | action='store_true', 14 | dest='confirmed', 15 | default=False, 16 | help='Flag needed to confirm the clear index.' 17 | ) 18 | parser.add_argument( 19 | '--timeout', 20 | action='store', 21 | dest='timeout', 22 | default=None, 23 | type=int, 24 | help='Specify the timeout in seconds for each operation.' 25 | ) 26 | -------------------------------------------------------------------------------- /bungiesearch/management/commands/clear_index.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from django.core.management import call_command 4 | from django.core.management.base import BaseCommand 5 | from django.utils import six 6 | 7 | from ._utils import add_arguments 8 | 9 | 10 | class Command(BaseCommand): 11 | help = 'Clears the search index of its contents.' 12 | add_arguments = add_arguments 13 | 14 | def handle(self, **options): 15 | if options.get('interactive', True): 16 | print('WARNING: This will irreparably remove EVERYTHING from your search index.') 17 | print('Your choices after this are to restore from backups or rebuild via the `rebuild_index` command.') 18 | 19 | yes_or_no = six.moves.input('Are you sure you wish to continue? [y/N] ') 20 | print 21 | 22 | if yes_or_no not in ['y', 'N']: 23 | print('No action taken: please type either "y" or "N".') 24 | sys.exit() 25 | 26 | if yes_or_no == 'N': 27 | print('No action taken.') 28 | sys.exit() 29 | 30 | if not options['confirmed']: 31 | print('No action taken: you must provide the --guilty-as-charged flag.') 32 | sys.exit() 33 | 34 | call_command('search_index', action='delete', **options) 35 | call_command('search_index', action='create', **options) 36 | -------------------------------------------------------------------------------- /bungiesearch/management/commands/rebuild_index.py: -------------------------------------------------------------------------------- 1 | from django.core.management import call_command 2 | from django.core.management.base import BaseCommand 3 | 4 | from ._utils import add_arguments 5 | 6 | 7 | class Command(BaseCommand): 8 | help = "Rebuilds the search index by clearing the search index and then performing an update." 9 | add_arguments = add_arguments 10 | 11 | def handle(self, **options): 12 | call_command('clear_index', **options) 13 | call_command('search_index', action='update', **options) 14 | -------------------------------------------------------------------------------- /bungiesearch/management/commands/search_index.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from django.core.management.base import BaseCommand 4 | from six import iteritems 5 | 6 | from ... import Bungiesearch 7 | from ...logger import logger 8 | from ...utils import update_index 9 | 10 | 11 | class Command(BaseCommand): 12 | args = '' 13 | help = 'Manage search index.' 14 | 15 | def add_arguments(self, parser): 16 | parser.add_argument( 17 | '--create', 18 | action='store_const', 19 | dest='action', 20 | const='create', 21 | help='Create the index specified in the settings with the mapping generating from the search indices.' 22 | ) 23 | parser.add_argument( 24 | '--update', 25 | action='store_const', 26 | dest='action', 27 | const='update', 28 | help='Update the index specified in the settings with the mapping generating from the search indices.') 29 | parser.add_argument( 30 | '--update-mapping', 31 | action='store_const', 32 | dest='action', 33 | const='update-mapping', 34 | help='Update the mapping of specified models (or all models) on the index specified in the settings.') 35 | parser.add_argument( 36 | '--delete', 37 | action='store_const', 38 | dest='action', 39 | const='delete', 40 | help='Delete the index specified in the settings. Requires the "--guilty-as-charged" flag.') 41 | parser.add_argument( 42 | '--delete-mapping', 43 | action='store_const', 44 | dest='action', 45 | const='delete-mapping', 46 | help='Delete the mapping of specified models (or all models) on the index specified in the settings. Requires the "--guilty-as-charged" flag.') 47 | parser.add_argument( 48 | '--guilty-as-charged', 49 | action='store_true', 50 | dest='confirmed', 51 | default=False, 52 | help='Flag needed to delete an index.') 53 | parser.add_argument( 54 | '--models', 55 | action='store', 56 | dest='models', 57 | default=None, 58 | help='Models to be updated, separated by commas. If none are specified, then all models defined in the index will be updated.') 59 | parser.add_argument( 60 | '--index', 61 | action='store', 62 | dest='index', 63 | default=None, 64 | help='Specify the index for which to apply the action, as defined in BUNGIESEARCH.INDEXES of settings. Defaults to using all indices.') 65 | parser.add_argument( 66 | '--bulk-size', 67 | action='store', 68 | dest='bulk_size', 69 | default=100, 70 | type=int, 71 | help='Specify the number of items to be updated together.') 72 | parser.add_argument( 73 | '--num-docs', 74 | action='store', 75 | dest='num_docs', 76 | default=-1, 77 | type=int, 78 | help='Specify the maximum number of items to be indexed. By default will index the whole model.') 79 | parser.add_argument( 80 | '--start', 81 | action='store', 82 | dest='start_date', 83 | default=None, 84 | type=str, 85 | help='Specify the start date and time of documents to be indexed.') 86 | parser.add_argument( 87 | '--end', 88 | action='store', 89 | dest='end_date', 90 | default=None, 91 | type=str, 92 | help='Specify the end date and time of documents to be indexed.') 93 | parser.add_argument( 94 | '--timeout', 95 | action='store', 96 | dest='timeout', 97 | default=None, 98 | type=int, 99 | help='Specify the timeout in seconds for each operation.') 100 | 101 | def handle(self, *args, **options): 102 | src = Bungiesearch(timeout=options.get('timeout')) 103 | es = src.get_es_instance() 104 | 105 | if not options['action']: 106 | raise ValueError('No action specified. Must be one of "create", "update" or "delete".') 107 | 108 | if options['action'].startswith('delete'): 109 | if not options['confirmed']: 110 | raise ValueError('If you know what a delete operation does (on index or mapping), add the --guilty-as-charged flag.') 111 | if options['action'] == 'delete': 112 | if options['index']: 113 | indices = [options['index']] 114 | else: 115 | indices = src.get_indices() 116 | 117 | for index in indices: 118 | logger.warning('Deleting elastic search index {}.'.format(index)) 119 | es.indices.delete(index=index, ignore=404) 120 | 121 | else: 122 | index_to_doctypes = defaultdict(list) 123 | if options['models']: 124 | logger.info('Deleting mapping for models {} on index {}.'.format(options['models'], index)) 125 | for model_name in options['models'].split(): 126 | for index in src.get_index(model_name): 127 | index_to_doctypes[index].append(model_name) 128 | elif options['index']: 129 | index = options['index'] 130 | logger.info('Deleting mapping for all models on index {}.'.format(index)) 131 | index_to_doctypes[index] = src.get_models(index) 132 | else: 133 | for index in src.get_indices(): 134 | index_to_doctypes[index] = src.get_models(index) 135 | logger.info('Deleting mapping for all models ({}) on all indices ({}).'.format(index_to_doctypes.values(), index_to_doctypes.keys())) 136 | 137 | for index, doctype_list in iteritems(index_to_doctypes): 138 | es.indices.delete_mapping(index, ','.join(doctype_list), params=None) 139 | 140 | elif options['action'] == 'create': 141 | if options['index']: 142 | indices = [options['index']] 143 | else: 144 | indices = src.get_indices() 145 | for index in indices: 146 | mapping = {} 147 | analysis = {'analyzer': {}, 'tokenizer': {}, 'filter': {}} 148 | 149 | for mdl_idx in src.get_model_indices(index): 150 | mapping[mdl_idx.get_model().__name__] = mdl_idx.get_mapping(meta_fields=False) 151 | 152 | mdl_analysis = mdl_idx.collect_analysis() 153 | for key in analysis.keys(): 154 | value = mdl_analysis.get(key) 155 | if value is not None: 156 | analysis[key].update(value) 157 | 158 | logger.info('Creating index {} with {} doctypes.'.format(index, len(mapping))) 159 | es.indices.create(index=index, body={'mappings': mapping, 'settings': {'analysis': analysis}}) 160 | 161 | es.cluster.health(index=','.join(indices), wait_for_status='green', timeout='30s') 162 | 163 | elif options['action'] == 'update-mapping': 164 | if options['index']: 165 | indices = [options['index']] 166 | else: 167 | indices = src.get_indices() 168 | 169 | if options['models']: 170 | models = options['models'].split(',') 171 | else: 172 | models = [] 173 | 174 | for index in indices: 175 | for model_name in src._idx_name_to_mdl_to_mdlidx[index]: 176 | if models and model_name not in models: 177 | continue 178 | logger.info('Updating mapping of model/doctype {} on index {}.'.format(model_name, index)) 179 | try: 180 | es.indices.put_mapping(model_name, src._idx_name_to_mdl_to_mdlidx[index][model_name].get_mapping(), index=index) 181 | except Exception as e: 182 | print(e) 183 | if raw_input('Something terrible happened! Type "abort" to stop updating the mappings: ') == 'abort': 184 | raise e 185 | print('Continuing.') 186 | 187 | else: 188 | if options['index']: 189 | indices = options['index'] 190 | else: 191 | indices = src.get_indices() 192 | if options['models']: 193 | model_names = options['models'].split(',') 194 | else: 195 | model_names = [model for index in indices for model in src.get_models(index)] 196 | 197 | logger.info('Updating models {} on indices {}.'.format(model_names, indices)) 198 | 199 | # Update index. 200 | for model_name in model_names: 201 | if src.get_model_index(model_name).indexing_query is not None: 202 | update_index(src.get_model_index(model_name).indexing_query, model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date']) 203 | else: 204 | update_index(src.get_model_index(model_name).get_model().objects.all(), model_name, bulk_size=options['bulk_size'], num_docs=options['num_docs'], start_date=options['start_date'], end_date=options['end_date']) 205 | -------------------------------------------------------------------------------- /bungiesearch/managers.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings as dj_settings 2 | from django.db.models import Manager 3 | 4 | from .logger import logger 5 | 6 | 7 | class BungiesearchManager(Manager): 8 | model = None 9 | 10 | ''' 11 | A Django manager for integrated search into models. 12 | ''' 13 | @property 14 | def search(self): 15 | from bungiesearch import Bungiesearch 16 | return Bungiesearch().index(*Bungiesearch.get_index(self.model, via_class=True)).doc_type(self.model.__name__) 17 | 18 | def search_index(self, index): 19 | from bungiesearch import Bungiesearch 20 | if index not in Bungiesearch.get_index(self.model, via_class=True): 21 | logger.warning('Model/doctype {} is not present on index {}: search may return no results.'.format(self.model.__name__, index)) 22 | return Bungiesearch().index(index).doc_type(self.model.__name__) 23 | 24 | def custom_search(self, index, doc_type): 25 | ''' 26 | Performs a search on a custom elasticsearch index and mapping. Will not attempt to map result objects. 27 | ''' 28 | from bungiesearch import Bungiesearch 29 | return Bungiesearch(raw_results=True).index(index).doc_type(doc_type) 30 | 31 | def contribute_to_class(self, cls, name): 32 | ''' 33 | Sets up the signal processor. Since self.model is not available 34 | in the constructor, we perform this operation here. 35 | ''' 36 | super(BungiesearchManager, self).contribute_to_class(cls, name) 37 | 38 | from . import Bungiesearch 39 | from .signals import get_signal_processor 40 | settings = Bungiesearch.BUNGIE 41 | if 'SIGNALS' in settings: 42 | self.signal_processor = get_signal_processor() 43 | self.signal_processor.setup(self.model) 44 | 45 | def __getattr__(self, alias): 46 | ''' 47 | Shortcut for search aliases. As explained in the docs (https://docs.python.org/2/reference/datamodel.html#object.__getattr__), 48 | this is only called as a last resort in case the attribute is not found. 49 | This function will check whether the given model is allowed to use the proposed alias and will raise an attribute error if not. 50 | ''' 51 | # Don't treat "private" attrs as possible aliases. This prevents an infinite recursion bug. 52 | # Similarly, if Bungiesearch is installed but not enabled, raise the expected error 53 | if alias[0] == '_' or not dj_settings.BUNGIESEARCH: 54 | raise AttributeError("'{}' object has no attribute '{}'".format(type(self), alias)) 55 | 56 | return self.search.hook_alias(alias, self.model) 57 | -------------------------------------------------------------------------------- /bungiesearch/signals.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from importlib import import_module 3 | from threading import Lock 4 | 5 | from django.db.models import signals 6 | 7 | from . import Bungiesearch 8 | from .utils import delete_index_item, update_index 9 | 10 | 11 | def get_signal_processor(): 12 | signals = Bungiesearch.BUNGIE['SIGNALS'] 13 | if 'SIGNAL_CLASS' in signals: 14 | signal_path = signals['SIGNAL_CLASS'].split('.') 15 | signal_module = import_module('.'.join(signal_path[:-1])) 16 | signal_class = getattr(signal_module, signal_path[-1]) 17 | else: 18 | signal_class = BungieSignalProcessor 19 | return signal_class() 20 | 21 | 22 | class BungieSignalProcessor(object): 23 | 24 | __index_lock = Lock() 25 | __items_to_be_indexed = defaultdict(list) 26 | 27 | def post_save_connector(self, sender, instance, **kwargs): 28 | try: 29 | Bungiesearch.get_index(sender, via_class=True) 30 | except KeyError: 31 | return # This model is not managed by Bungiesearch. 32 | 33 | try: 34 | buffer_size = Bungiesearch.BUNGIE['SIGNALS']['BUFFER_SIZE'] 35 | except KeyError: 36 | buffer_size = 100 37 | 38 | items = None 39 | with self.__index_lock: 40 | self.__items_to_be_indexed[sender].append(instance) 41 | if len(self.__items_to_be_indexed[sender]) >= buffer_size: 42 | items = self.__items_to_be_indexed[sender] 43 | # Let's now empty this buffer. 44 | self.__items_to_be_indexed[sender] = [] 45 | 46 | if items: 47 | update_index(items, sender.__name__, bulk_size=buffer_size) 48 | 49 | def pre_delete_connector(self, sender, instance, **kwargs): 50 | try: 51 | Bungiesearch.get_index(sender, via_class=True) 52 | except KeyError: 53 | return # This model is not managed by Bungiesearch. 54 | 55 | delete_index_item(instance, sender.__name__) 56 | 57 | def setup(self, model): 58 | signals.post_save.connect(self.post_save_connector, sender=model) 59 | signals.pre_delete.connect(self.pre_delete_connector, sender=model) 60 | 61 | def teardown(self, model): 62 | signals.pre_delete.disconnect(self.pre_delete_connector, sender=model) 63 | signals.post_save.disconnect(self.post_save_connector, sender=model) 64 | -------------------------------------------------------------------------------- /bungiesearch/utils.py: -------------------------------------------------------------------------------- 1 | from dateutil.parser import parse as parsedt 2 | from django.utils import timezone 3 | 4 | from elasticsearch.exceptions import NotFoundError 5 | 6 | from . import Bungiesearch 7 | from .logger import logger 8 | 9 | try: 10 | from elasticsearch.helpers import bulk_index 11 | except ImportError: 12 | from elasticsearch.helpers import bulk as bulk_index 13 | 14 | 15 | def update_index(model_items, model_name, action='index', bulk_size=100, num_docs=-1, start_date=None, end_date=None, refresh=True): 16 | ''' 17 | Updates the index for the provided model_items. 18 | :param model_items: a list of model_items (django Model instances, or proxy instances) which are to be indexed/updated or deleted. 19 | If action is 'index', the model_items must be serializable objects. If action is 'delete', the model_items must be primary keys 20 | corresponding to obects in the index. 21 | :param model_name: doctype, which must also be the model name. 22 | :param action: the action that you'd like to perform on this group of data. Must be in ('index', 'delete') and defaults to 'index.' 23 | :param bulk_size: bulk size for indexing. Defaults to 100. 24 | :param num_docs: maximum number of model_items from the provided list to be indexed. 25 | :param start_date: start date for indexing. Must be as YYYY-MM-DD. 26 | :param end_date: end date for indexing. Must be as YYYY-MM-DD. 27 | :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh 28 | immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True. 29 | :note: If model_items contain multiple models, then num_docs is applied to *each* model. For example, if bulk_size is set to 5, 30 | and item contains models Article and Article2, then 5 model_items of Article *and* 5 model_items of Article2 will be indexed. 31 | ''' 32 | src = Bungiesearch() 33 | 34 | if action == 'delete' and not hasattr(model_items, '__iter__'): 35 | raise ValueError("If action is 'delete', model_items must be an iterable of primary keys.") 36 | 37 | logger.info('Getting index for model {}.'.format(model_name)) 38 | for index_name in src.get_index(model_name): 39 | index_instance = src.get_model_index(model_name) 40 | model = index_instance.get_model() 41 | 42 | if num_docs == -1: 43 | if isinstance(model_items, (list, tuple)): 44 | num_docs = len(model_items) 45 | else: 46 | model_items = filter_model_items(index_instance, model_items, model_name, start_date, end_date) 47 | num_docs = model_items.count() 48 | 49 | if not model_items.ordered: 50 | model_items = model_items.order_by('pk') 51 | else: 52 | logger.warning('Limiting the number of model_items to {} to {}.'.format(action, num_docs)) 53 | 54 | logger.info('{} {} documents on index {}'.format(action, num_docs, index_name)) 55 | prev_step = 0 56 | max_docs = num_docs + bulk_size if num_docs > bulk_size else bulk_size + 1 57 | for next_step in range(bulk_size, max_docs, bulk_size): 58 | logger.info('{}: documents {} to {} of {} total on index {}.'.format(action.capitalize(), prev_step, next_step, num_docs, index_name)) 59 | data = create_indexed_document(index_instance, model_items[prev_step:next_step], action) 60 | bulk_index(src.get_es_instance(), data, index=index_name, doc_type=model.__name__, raise_on_error=True) 61 | prev_step = next_step 62 | 63 | if refresh: 64 | src.get_es_instance().indices.refresh(index=index_name) 65 | 66 | 67 | def delete_index_item(item, model_name, refresh=True): 68 | ''' 69 | Deletes an item from the index. 70 | :param item: must be a serializable object. 71 | :param model_name: doctype, which must also be the model name. 72 | :param refresh: a boolean that determines whether to refresh the index, making all operations performed since the last refresh 73 | immediately available for search, instead of needing to wait for the scheduled Elasticsearch execution. Defaults to True. 74 | ''' 75 | src = Bungiesearch() 76 | 77 | logger.info('Getting index for model {}.'.format(model_name)) 78 | for index_name in src.get_index(model_name): 79 | index_instance = src.get_model_index(model_name) 80 | item_es_id = index_instance.fields['_id'].value(item) 81 | try: 82 | src.get_es_instance().delete(index_name, model_name, item_es_id) 83 | except NotFoundError as e: 84 | logger.warning('NotFoundError: could not delete {}.{} from index {}: {}.'.format(model_name, item_es_id, index_name, str(e))) 85 | 86 | if refresh: 87 | src.get_es_instance().indices.refresh(index=index_name) 88 | 89 | 90 | def create_indexed_document(index_instance, model_items, action): 91 | ''' 92 | Creates the document that will be passed into the bulk index function. 93 | Either a list of serialized objects to index, or a a dictionary specifying the primary keys of items to be delete. 94 | ''' 95 | data = [] 96 | if action == 'delete': 97 | for pk in model_items: 98 | data.append({'_id': pk, '_op_type': action}) 99 | else: 100 | for doc in model_items: 101 | if index_instance.matches_indexing_condition(doc): 102 | data.append(index_instance.serialize_object(doc)) 103 | return data 104 | 105 | 106 | def filter_model_items(index_instance, model_items, model_name, start_date, end_date): 107 | ''' Filters the model items queryset based on start and end date.''' 108 | if index_instance.updated_field is None: 109 | logger.warning("No updated date field found for {} - not restricting with start and end date".format(model_name)) 110 | else: 111 | if start_date: 112 | model_items = model_items.filter(**{'{}__gte'.format(index_instance.updated_field): __str_to_tzdate__(start_date)}) 113 | if end_date: 114 | model_items = model_items.filter(**{'{}__lte'.format(index_instance.updated_field): __str_to_tzdate__(end_date)}) 115 | 116 | return model_items 117 | 118 | 119 | def __str_to_tzdate__(date_str): 120 | return timezone.make_aware(parsedt(date_str), timezone.get_current_timezone()) 121 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | elasticsearch-dsl>=2.0.0,<3.0.0 2 | elasticsearch>=2.0.0,<3.0.0 3 | python-dateutil 4 | six 5 | 6 | bungiesearch 7 | coveralls 8 | pytz 9 | -------------------------------------------------------------------------------- /runtests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # pass in --cluster as an argument to start a cluster instead of a single node 3 | set -e 4 | trap 'jobs -p | xargs kill -9' EXIT 5 | 6 | CLUSTER_URL=http://127.0.0.1:9200 7 | ES_PATH=elasticsearch 8 | 9 | if [ ${TRAVIS} ]; then 10 | ES_PATH=./elasticsearch-2.3.0/bin/elasticsearch 11 | fi 12 | 13 | function has_command() { 14 | type $1 &> /dev/null 15 | } 16 | 17 | function is_responding() { 18 | curl --output /dev/null --fail --silent $1 19 | } 20 | 21 | function wait_for_cluster() { 22 | echo 'Waiting on elasticsearch to be ready on port 9200' 23 | until is_responding "$CLUSTER_URL/_cluster/health?wait_for_nodes=$1&wait_for_status=green"; do 24 | printf '.' 25 | sleep 1 26 | done 27 | echo 28 | } 29 | 30 | if ! is_responding $CLUSTER_URL; then 31 | if ! has_command elasticsearch; then 32 | echo 'No elasticsearch command found and no server running' 33 | echo 'Elasticsearch cluster must be running on port 9200' 34 | exit 1 35 | else 36 | if [ "$1" != "--cluster" ]; then 37 | echo 'Starting single elasticsearch node' 38 | $ES_PATH -D es.index.number_of_replicas=0 &> /dev/null & 39 | wait_for_cluster 1 40 | else 41 | echo 'Starting elasticsearch cluster with 2 nodes' 42 | $ES_PATH \ 43 | -D es.cluster.name="mycluster" \ 44 | -D es.node.name="mycluster-node2" \ 45 | -D es.node.master=true \ 46 | -D es.node.data=false \ 47 | -D es.index.number_of_replicas=0 \ 48 | -D es.network.host=127.0.0.1 \ 49 | -D es.foreground=yes \ 50 | -D es.discovery.zen.ping.multicast.enabled=false \ 51 | -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null & 52 | 53 | $ES_PATH \ 54 | -D es.cluster.name="mycluster" \ 55 | -D es.node.name="mycluster-node2" \ 56 | -D es.node.master=false \ 57 | -D es.node.data=true \ 58 | -D es.index.number_of_replicas=0 \ 59 | -D es.network.host=127.0.0.1 \ 60 | -D es.foreground=yes \ 61 | -D es.discovery.zen.ping.multicast.enabled=false \ 62 | -D es.discovery.zen.ping.unicast.hosts=127.0.0.1:9300,127.0.0.1:9301,127.0.0.1:9302 &> /dev/null & 63 | 64 | wait_for_cluster 2 65 | fi 66 | fi 67 | fi 68 | 69 | python -B tests/manage.py test 70 | 71 | # only collect coverage in travis ci 72 | if [ ${COVERAGE} ]; then 73 | echo 'Starting to collect coverage...' 74 | coverage run --source=tests tests/manage.py test 75 | fi 76 | 77 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | from os.path import dirname, join 4 | 5 | from setuptools import find_packages, setup 6 | 7 | VERSION = (1, 3, 0) 8 | __version__ = VERSION 9 | __versionstr__ = '.'.join(map(str, VERSION)) 10 | 11 | long_description = 'Should have been loaded from README.md.' 12 | with open(join(dirname(__file__), 'README.rst')) as f: 13 | long_description = f.read().strip() 14 | 15 | 16 | install_requires = [ 17 | 'django>=1.8', 18 | 'elasticsearch-dsl>=2.0.0,<3.0.0', 19 | 'elasticsearch>=2.0.0,<3.0.0', 20 | 'python-dateutil', 21 | 'six', 22 | ] 23 | 24 | tests_require = [] 25 | 26 | # use external unittest for 2.6 27 | if sys.version_info[:2] == (2, 6): 28 | tests_require.append('unittest2') 29 | 30 | setup( 31 | name="bungiesearch", 32 | description="A Django elasticsearch wrapper and helper using elasticsearch-dsl-py high level library.", 33 | license="BSD-3", 34 | url="https://github.com/ChristopherRabotin/bungiesearch", 35 | long_description=long_description, 36 | version=__versionstr__, 37 | author="Christopher Rabotin", 38 | author_email="christopher.rabotin@gmail.com", 39 | packages=find_packages( 40 | where='.', 41 | exclude=('bungiesearch/tests',) 42 | ), 43 | classifiers=[ 44 | "Development Status :: 5 - Production/Stable", 45 | "Intended Audience :: Developers", 46 | "License :: OSI Approved :: BSD License", 47 | "Operating System :: OS Independent", 48 | "Programming Language :: Python :: 2", 49 | "Programming Language :: Python :: 3", 50 | "Programming Language :: Python :: 3.4", 51 | "Programming Language :: Python :: 3.5", 52 | "Framework :: Django" 53 | ], 54 | keywords="elasticsearch haystack django bungiesearch", 55 | install_requires=install_requires, 56 | dependency_links=['https://github.com/elasticsearch/elasticsearch-dsl-py#egg=elasticsearch-dsl-py'], 57 | ) 58 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [ $(whoami) == "root" ] && [ "$1" != "--force" ]; then 5 | echo "It's not recommended to run setup with root" 6 | echo 'run with --force to ignore' 7 | exit 1 8 | fi 9 | 10 | if [ -z "$VIRTUAL_ENV" ] && [ "$1" != "--force" ]; then 11 | echo "$0 should be run inside a python virtualenv" 12 | echo 'run with --force to ignore' 13 | exit 1 14 | fi 15 | 16 | echo 'Installing Python dependencies' 17 | pip install pip setuptools --upgrade 18 | pip install -r requirements.txt 19 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/__init__.py -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChristopherRabotin/bungiesearch/13768342bc2698b214eb0003c2d113b6e273c30d/tests/core/__init__.py -------------------------------------------------------------------------------- /tests/core/analysis.py: -------------------------------------------------------------------------------- 1 | from elasticsearch_dsl.analysis import analyzer, token_filter 2 | 3 | edge_ngram_analyzer = analyzer( 4 | 'edge_ngram_analyzer', 5 | type='custom', 6 | tokenizer='standard', 7 | filter=[ 8 | 'lowercase', 9 | token_filter( 10 | 'edge_ngram_filter', 11 | type='edgeNGram', 12 | min_gram=2, 13 | max_gram=20 14 | ) 15 | ] 16 | ) 17 | -------------------------------------------------------------------------------- /tests/core/bungie_signal.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This test signal acts as a proxy to BungieSignalProcessor. It allows us 3 | to test the functionality of the default signal processor while using a 4 | custom processor instead, hence testing that we can plug in and use a custom 5 | signal processor. 6 | ''' 7 | from django.db.models import signals 8 | 9 | from bungiesearch.signals import BungieSignalProcessor 10 | 11 | 12 | class BungieTestSignalProcessor(BungieSignalProcessor): 13 | 14 | def handle_save(self, sender, instance, **kwargs): 15 | self.post_save_connector(sender, instance, **kwargs) 16 | 17 | def handle_delete(self, sender, instance, **kwargs): 18 | self.pre_delete_connector(sender, instance, **kwargs) 19 | 20 | def setup(self, model): 21 | signals.post_save.connect(self.handle_save, sender=model) 22 | signals.pre_delete.connect(self.handle_delete, sender=model) 23 | self.setup_ran = True 24 | 25 | def teardown(self, model): 26 | signals.pre_delete.disconnect(self.handle_delete, sender=model) 27 | signals.post_save.disconnect(self.handle_save, sender=model) 28 | self.teardown_ran = True 29 | -------------------------------------------------------------------------------- /tests/core/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | from bungiesearch.managers import BungiesearchManager 4 | 5 | 6 | class Article(models.Model): 7 | title = models.TextField(db_index=True) 8 | authors = models.TextField(blank=True) 9 | description = models.TextField(blank=True) 10 | text_field = models.TextField(null=True) 11 | link = models.URLField(max_length=510, unique=True, db_index=True) 12 | published = models.DateTimeField(null=True) 13 | created = models.DateTimeField(auto_now_add=True) 14 | updated = models.DateTimeField(null=True) 15 | tweet_count = models.IntegerField() 16 | raw = models.BinaryField(null=True) 17 | source_hash = models.BigIntegerField(null=True) 18 | missing_data = models.CharField(blank=True, max_length=255) 19 | positive_feedback = models.PositiveIntegerField(null=True, blank=True, default=0) 20 | negative_feedback = models.PositiveIntegerField(null=True, blank=True, default=0) 21 | popularity_index = models.IntegerField(default=0) 22 | 23 | objects = BungiesearchManager() 24 | 25 | class Meta: 26 | app_label = 'core' 27 | 28 | 29 | class User(models.Model): 30 | name = models.TextField(db_index=True) 31 | user_id = models.TextField(blank=True, primary_key=True) 32 | about = models.TextField(blank=True) 33 | created = models.DateTimeField(auto_now_add=True) 34 | updated = models.DateTimeField(null=True) 35 | 36 | objects = BungiesearchManager() 37 | 38 | class Meta: 39 | app_label = 'core' 40 | 41 | 42 | class NoUpdatedField(models.Model): 43 | field_title = models.TextField(db_index=True) 44 | field_description = models.TextField(blank=True) 45 | 46 | objects = BungiesearchManager() 47 | 48 | class Meta: 49 | app_label = 'core' 50 | 51 | 52 | class ManangedButEmpty(models.Model): 53 | field_title = models.TextField(db_index=True) 54 | field_description = models.TextField(blank=True) 55 | 56 | objects = BungiesearchManager() 57 | 58 | class Meta: 59 | app_label = 'core' 60 | 61 | 62 | class Unmanaged(models.Model): 63 | field_title = models.TextField(db_index=True) 64 | field_description = models.TextField(blank=True) 65 | 66 | class Meta: 67 | app_label = 'core' 68 | -------------------------------------------------------------------------------- /tests/core/search_aliases.py: -------------------------------------------------------------------------------- 1 | from bungiesearch.aliases import SearchAlias 2 | from core.models import Article, NoUpdatedField 3 | 4 | 5 | class SearchTitle(SearchAlias): 6 | def alias_for(self, title): 7 | return self.search_instance.query('match', title=title) 8 | 9 | class Meta: 10 | models = (Article,) 11 | alias_name = 'title_search' 12 | 13 | class Title(SearchAlias): 14 | def alias_for(self, title): 15 | return self.search_instance.query('match', title=title) 16 | 17 | class InvalidAlias(SearchAlias): 18 | class Meta: 19 | models = (Article,) 20 | 21 | class TitleFilter(SearchAlias): 22 | def alias_for(self, title): 23 | return self.search_instance.filter('term', title=title) 24 | 25 | class NoUpdatedMdlOnly(SearchAlias): 26 | def alias_for(self, title): 27 | return self.search_instance.filter('term', title=title) 28 | 29 | class Meta: 30 | models = (NoUpdatedField,) 31 | 32 | class ReturningSelfAlias(SearchAlias): 33 | def alias_for(self): 34 | return self 35 | 36 | class Meta: 37 | alias_name = 'get_alias_for_test' 38 | 39 | class BisIndex(SearchAlias): 40 | def alias_for(self): 41 | self.search_instance._index = 'bungiesearch_demo_bis' 42 | return self.search_instance 43 | 44 | class Meta: 45 | models = (Article,) 46 | alias_name = 'bisindex' 47 | -------------------------------------------------------------------------------- /tests/core/search_indices.py: -------------------------------------------------------------------------------- 1 | from bungiesearch.fields import DateField, NumberField, StringField 2 | from bungiesearch.indices import ModelIndex 3 | from core.models import Article, NoUpdatedField, User 4 | 5 | from .analysis import edge_ngram_analyzer 6 | 7 | 8 | class ArticleIndex(ModelIndex): 9 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published') 10 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])') 11 | text = StringField(template='article.txt', analyzer=edge_ngram_analyzer) 12 | 13 | class Meta: 14 | model = Article 15 | updated_field = 'updated' 16 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash') 17 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 18 | 'title': {'boost': 1.75}, 19 | 'description': {'boost': 1.35}, 20 | 'full_text': {'boost': 1.125}} 21 | default = True 22 | 23 | 24 | class UserIndex(ModelIndex): 25 | effective_date = DateField(eval_as='obj.created if obj.created and obj.updated > obj.created else obj.updated') 26 | about = StringField(model_attr='about', analyzer=edge_ngram_analyzer) 27 | int_about = NumberField(coretype='integer') 28 | 29 | def prepare_int_about(self, obj): 30 | try: 31 | int_about = int(obj.about) 32 | except ValueError: 33 | int_about = 1 34 | 35 | return int_about 36 | 37 | class Meta: 38 | model = User 39 | id_field = 'user_id' 40 | updated_field = 'updated' 41 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 42 | 'about': {'boost': 1.35}} 43 | default = True 44 | 45 | 46 | class NoUpdatedFieldIndex(ModelIndex): 47 | class Meta: 48 | model = NoUpdatedField 49 | exclude = ('field_description',) 50 | optimize_queries = True 51 | indexing_query = NoUpdatedField.objects.defer(*exclude).select_related().all() 52 | -------------------------------------------------------------------------------- /tests/core/search_indices_bis.py: -------------------------------------------------------------------------------- 1 | from bungiesearch.fields import DateField, StringField 2 | from bungiesearch.indices import ModelIndex 3 | from core.models import Article, ManangedButEmpty, User 4 | 5 | 6 | class ArticleIndex(ModelIndex): 7 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published') 8 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])') 9 | more_fields = StringField(eval_as='"some value"') 10 | 11 | class Meta: 12 | model = Article 13 | updated_field = 'updated' 14 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash') 15 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 16 | 'title': {'boost': 1.75}, 17 | 'description': {'boost': 1.35}, 18 | 'full_text': {'boost': 1.125}} 19 | default = False 20 | 21 | 22 | class UserIndex(ModelIndex): 23 | effective_date = DateField(eval_as='obj.created if obj.created and obj.published > obj.created else obj.published') 24 | meta_data = StringField(eval_as='" ".join([fld for fld in [obj.link, str(obj.tweet_count), obj.raw] if fld])') 25 | more_fields = StringField(eval_as='"some value"') 26 | 27 | class Meta: 28 | model = User 29 | id_field = 'user_id' 30 | updated_field = 'updated' 31 | exclude = ('raw', 'missing_data', 'negative_feedback', 'positive_feedback', 'popularity_index', 'source_hash') 32 | hotfixes = {'updated': {'null_value': '2013-07-01'}, 33 | 'title': {'boost': 1.75}, 34 | 'about': {'boost': 1.35}, 35 | 'full_text': {'boost': 1.125}} 36 | default = False 37 | 38 | 39 | class EmptyIndex(ModelIndex): 40 | def matches_indexing_condition(self, item): 41 | return False 42 | 43 | class Meta: 44 | model = ManangedButEmpty 45 | exclude = ('field_description',) 46 | optimize_queries = True 47 | -------------------------------------------------------------------------------- /tests/core/templates/article.txt: -------------------------------------------------------------------------------- 1 | {{ object.title }} 2 | {{ object.authors }} 3 | {{ object.description }} 4 | {{ object.text_field }} -------------------------------------------------------------------------------- /tests/core/test_bungiesearch.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from django.core.management import call_command 4 | from django.test import TestCase, override_settings 5 | from six import iteritems 6 | 7 | import pytz 8 | from bungiesearch import Bungiesearch 9 | from bungiesearch.utils import update_index 10 | from core.bungie_signal import BungieTestSignalProcessor 11 | from core.models import (Article, ManangedButEmpty, NoUpdatedField, Unmanaged, 12 | User) 13 | from core.search_indices import ArticleIndex, UserIndex 14 | 15 | 16 | class CoreTestCase(TestCase): 17 | @classmethod 18 | def setUpClass(cls): 19 | # Let's start by creating the index and mapping. 20 | # If we create an object before the index, the index 21 | # will be created automatically, and we want to test the command. 22 | call_command('search_index', action='create') 23 | 24 | art_1 = {'title': 'Title one', 25 | 'description': 'Description of article 1.', 26 | 'text_field': '', 27 | 'link': 'http://example.com/article_1', 28 | 'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)), 29 | 'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)), 30 | 'tweet_count': 20, 31 | 'source_hash': 159159159159, 32 | 'missing_data': '', 33 | 'positive_feedback': 50, 34 | 'negative_feedback': 5, 35 | } 36 | 37 | user_1 = {'user_id': 'bungie1', 38 | 'about': 'Description of user 1', 39 | 'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)), 40 | 'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)), 41 | } 42 | 43 | Article.objects.create(**art_1) 44 | User.objects.create(**user_1) 45 | 46 | art_2 = dict((k, v) for k, v in iteritems(art_1)) 47 | art_2['link'] += '/page2' 48 | art_2['title'] = 'Title two' 49 | art_2['description'] = 'This is a second article.' 50 | art_2['text_field'] = None 51 | art_2['published'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15)) 52 | 53 | user_2 = dict((k, v) for k, v in iteritems(user_1)) 54 | user_2['user_id'] = 'bungie2' 55 | user_2['about'] = 'This is the second user' 56 | user_2['created'] = pytz.UTC.localize(datetime(year=2010, month=9, day=15)) 57 | 58 | Article.objects.create(**art_2) 59 | User.objects.create(**user_2) 60 | NoUpdatedField.objects.create(field_title='My title', field_description='This is a short description.') 61 | 62 | call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged') 63 | 64 | def test_count_after_clear(self): 65 | # can flake because elasticsearch create API is asynchronous 66 | self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2) 67 | call_command('rebuild_index', interactive=False, confirmed='guilty-as-charged') 68 | self.assertEqual(Article.objects.search_index('bungiesearch_demo').count(), 2) 69 | 70 | @classmethod 71 | def tearDownClass(cls): 72 | call_command('search_index', action='delete', confirmed='guilty-as-charged') 73 | 74 | def test_model_index_generation(self): 75 | ''' 76 | Check that the mapping is the expected one. 77 | ''' 78 | expected_article = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'}, 79 | 'description': {'type': 'string', 'boost': 1.35, 'analyzer': 'snowball'}, 80 | 'text': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'}, 81 | 'text_field': {'type': 'string', 'analyzer': 'snowball'}, 82 | 'created': {'type': 'date'}, 83 | 'title': {'type': 'string', 'boost': 1.75, 'analyzer': 'snowball'}, 84 | 'authors': {'type': 'string', 'analyzer': 'snowball'}, 85 | 'meta_data': {'type': 'string', 'analyzer': 'snowball'}, 86 | 'link': {'type': 'string', 'analyzer': 'snowball'}, 87 | 'effective_date': {'type': 'date'}, 88 | 'tweet_count': {'type': 'integer'}, 89 | 'id': {'type': 'integer'}, 90 | '_id': {'type': 'integer'}, # This is the elastic search index. 91 | 'published': {'type': 'date'}} 92 | } 93 | expected_user = {'properties': {'updated': {'type': 'date', 'null_value': '2013-07-01'}, 94 | 'about': {'type': 'string', 'analyzer': 'edge_ngram_analyzer'}, 95 | 'int_about': {'type': 'integer'}, 96 | 'user_id': {'analyzer': 'snowball', 'type': 'string'}, 97 | 'effective_date': {'type': 'date'}, 98 | 'created': {'type': 'date'}, 99 | 'name': {'analyzer': 'snowball', 'type': 'string'}, 100 | '_id': {'analyzer': 'snowball', 'type': 'string'}} 101 | } 102 | 103 | self.assertEqual(ArticleIndex().get_mapping(), expected_article) 104 | self.assertEqual(UserIndex().get_mapping(), expected_user) 105 | 106 | def test_fetch_item(self): 107 | ''' 108 | Test searching and mapping. 109 | ''' 110 | self.assertEqual(Article.objects.search.query('match', _all='Description')[0], Article.objects.get(title='Title one'), 'Searching for "Description" did not return just the first Article.') 111 | self.assertEqual(Article.objects.search.query('match', _all='second article')[0], Article.objects.get(title='Title two'), 'Searching for "second article" did not return the second Article.') 112 | 113 | self.assertEqual(User.objects.search.query('match', _all='Description')[0], User.objects.get(user_id='bungie1'), 'Searching for "About" did not return the User.') 114 | self.assertEqual(User.objects.search.query('match', _all='second user')[0], User.objects.get(user_id='bungie2'), 'Searching for "second user" did not return the User.') 115 | 116 | def test_raw_fetch(self): 117 | ''' 118 | Test searching and mapping. 119 | ''' 120 | item = Article.objects.search.query('match', _all='Description')[:1:True] 121 | self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.') 122 | 123 | item = User.objects.search.query('match', _all='Description')[:1:True] 124 | self.assertTrue(hasattr(item, 'meta'), 'Fetching first raw results did not return an object with a meta attribute.') 125 | 126 | def test_iteration(self): 127 | ''' 128 | Tests iteration on Bungiesearch items. 129 | ''' 130 | lazy_search_article = Article.objects.search.query('match', title='title') 131 | db_items = list(Article.objects.all()) 132 | self.assertTrue(all([result in db_items for result in lazy_search_article]), 'Searching for title "title" did not return all articles.') 133 | self.assertTrue(all([result in db_items for result in lazy_search_article[:]]), 'Searching for title "title" did not return all articles when using empty slice.') 134 | self.assertEqual(len(lazy_search_article[:1]), 1, 'Get item with start=None and stop=1 did not return one item.') 135 | self.assertEqual(len(lazy_search_article[:2]), 2, 'Get item with start=None and stop=2 did not return two item.') 136 | 137 | lazy_search_user = User.objects.search.query('match', about='user') 138 | db_items = list(User.objects.all()) 139 | self.assertTrue(all([result in db_items for result in lazy_search_user]), 'Searching for description "user" did not return all articles.') 140 | self.assertTrue(all([result in db_items for result in lazy_search_user[:]]), 'Searching for description "user" did not return all articles when using empty slice.') 141 | self.assertEqual(len(lazy_search_user[:1]), 1, 'Get item with start=None and stop=1 did not return one item.') 142 | self.assertEqual(len(lazy_search_user[:2]), 2, 'Get item with start=None and stop=2 did not return two item.') 143 | 144 | def test_no_results(self): 145 | ''' 146 | Test empty results. 147 | ''' 148 | self.assertEqual(list(Article.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.') 149 | self.assertEqual(Article.objects.search.query('match', _all='nothing')[:10], [], 'Searching for "nothing" did not return an empty list on get item call.') 150 | 151 | self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.') 152 | self.assertEqual(list(User.objects.search.query('match', _all='nothing')), [], 'Searching for "nothing" did not return an empty list on iterator call.') 153 | 154 | def test_custom_search(self): 155 | ''' 156 | Test searching on custom index and doc_type. 157 | ''' 158 | search = Article.objects.custom_search(index='bungiesearch_demo', doc_type='Article') 159 | es_art1 = search.query('match', _all='Description')[0] 160 | db_art1 = Article.objects.get(title='Title one') 161 | es_art2 = search.query('match', _all='second article')[0] 162 | db_art2 = Article.objects.get(title='Title two') 163 | self.assertTrue(all([es_art1.id == db_art1.id, es_art1.title == db_art1.title, es_art1.description == db_art1.description]), 'Searching for "Description" did not return the first Article.') 164 | self.assertTrue(all([es_art2.id == db_art2.id, es_art2.title == db_art2.title, es_art2.description == db_art2.description]), 'Searching for "second article" did not return the second Article.') 165 | 166 | search = User.objects.custom_search(index='bungiesearch_demo', doc_type='User') 167 | es_user1 = search.query('match', _all='Description')[0] 168 | db_user1 = User.objects.get(user_id='bungie1') 169 | self.assertRaises(AttributeError, getattr, es_user1, 'id') 170 | self.assertTrue(all([es_user1.user_id == db_user1.user_id, es_user1.about == db_user1.about]), 'Searching for "About" did not return the first User.') 171 | 172 | def test_get_model(self): 173 | ''' 174 | Test model mapping. 175 | ''' 176 | self.assertEqual(ArticleIndex().get_model(), Article, 'Model was not Article.') 177 | self.assertEqual(UserIndex().get_model(), User, 'Model was not User') 178 | 179 | def test_cloning(self): 180 | ''' 181 | Tests that Bungiesearch remains lazy with specific function which should return clones. 182 | ''' 183 | inst = Article.objects.search.query('match', _all='Description') 184 | self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.') 185 | 186 | inst = User.objects.search.query('match', _all='Description') 187 | self.assertIsInstance(inst.only('_id'), inst.__class__, 'Calling `only` does not return a clone of itself.') 188 | 189 | def test_search_alias_exceptions(self): 190 | ''' 191 | Tests that invalid aliases raise exceptions. 192 | ''' 193 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias') 194 | self.assertRaises(NotImplementedError, Article.objects.bsearch_invalidalias) 195 | self.assertRaises(ValueError, getattr, Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter'), 'bsearch_noupdatedmdlonly') 196 | 197 | @override_settings(BUNGIESEARCH={}) 198 | def test_search_alias_not_setup(self): 199 | ''' 200 | Tests that Bungiesearch is not instantiated when not set up 201 | This is its own test due to the override_settings decorator 202 | ''' 203 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_no_such_alias') 204 | self.assertRaises(AttributeError, getattr, Article.objects, 'bsearch_title_search') 205 | 206 | def test_search_aliases(self): 207 | ''' 208 | Tests search alias errors and functionality. 209 | ''' 210 | title_alias = Article.objects.bsearch_title_search('title') 211 | db_items = list(Article.objects.all()) 212 | self.assertEqual(title_alias.to_dict(), {'query': {'match': {'title': 'title'}}}, 'Title alias search did not return the expected JSON query.') 213 | self.assertTrue(all([result in db_items for result in title_alias]), 'Alias searching for title "title" did not return all articles.') 214 | self.assertTrue(all([result in db_items for result in title_alias[:]]), 'Alias searching for title "title" did not return all articles when using empty slice.') 215 | self.assertEqual(len(title_alias[:1]), 1, 'Get item on an alias search with start=None and stop=1 did not return one item.') 216 | self.assertEqual(len(title_alias[:2]), 2, 'Get item on an alias search with start=None and stop=2 did not return two item.') 217 | self.assertEqual(title_alias.to_dict(), Article.objects.bsearch_title('title').to_dict(), 'Alias applicable to all models does not return the same JSON request body as the model specific one.') 218 | self.assertEqual(NoUpdatedField.objects.search.filter('term', title='My title').to_dict(), NoUpdatedField.objects.bsearch_noupdatedmdlonly('My title').to_dict(), 'Alias applicable only to NoUpdatedField does not generate the correct filter.') 219 | 220 | def test_bungie_instance_search_aliases(self): 221 | alias_dictd = Article.objects.search.bsearch_title('title query').bsearch_titlefilter('title filter').to_dict() 222 | expected = {'query': {'bool': {'filter': [{'term': {'title': 'title filter'}}], 'must': [{'match': {'title': 'title query'}}]}}} 223 | self.assertEqual(alias_dictd, expected, 'Alias on Bungiesearch instance did not return the expected dictionary.') 224 | 225 | def test_search_alias_model(self): 226 | self.assertEqual(Article.objects.bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.') 227 | self.assertEqual(Article.objects.search.bsearch_title('title query').bsearch_get_alias_for_test().get_model(), Article, 'Unexpected get_model information on search alias.') 228 | self.assertRaises(ValueError, Bungiesearch().bsearch_get_alias_for_test().get_model) 229 | 230 | def test_post_save(self): 231 | art = {'title': 'Title three', 232 | 'description': 'Postsave', 233 | 'link': 'http://example.com/sparrho', 234 | 'published': pytz.UTC.localize(datetime(year=2020, month=9, day=15)), 235 | 'updated': pytz.UTC.localize(datetime(year=2014, month=9, day=10)), 236 | 'tweet_count': 20, 237 | 'source_hash': 159159159159, 238 | 'missing_data': '', 239 | 'positive_feedback': 50, 240 | 'negative_feedback': 5} 241 | obj = Article.objects.create(**art) 242 | find_three = Article.objects.search.query('match', title='three') 243 | self.assertEqual(len(find_three), 2, 'Searching for "three" in title did not return exactly two items (got {}).'.format(find_three)) 244 | # Let's check that both returned items are from different indices. 245 | self.assertNotEqual(find_three[0:1:True].meta.index, find_three[1:2:True].meta.index, 'Searching for "three" did not return items from different indices.') 246 | # Let's now delete this object to test the post delete signal. 247 | obj.delete() 248 | 249 | def test_bulk_delete(self): 250 | ''' 251 | This tests that using the update_index function with 'delete' as the action performs a bulk delete operation on the data. 252 | ''' 253 | bulk_art1 = {'title': 'Title four', 254 | 'description': 'Bulk delete first', 255 | 'link': 'http://example.com/bd1', 256 | 'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)), 257 | 'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)), 258 | 'tweet_count': 20, 259 | 'source_hash': 159159159159, 260 | 'missing_data': '', 261 | 'positive_feedback': 50, 262 | 'negative_feedback': 5} 263 | bulk_art2 = {'title': 'Title five', 264 | 'description': 'Bulk delete second', 265 | 'link': 'http://example.com/bd2', 266 | 'published': pytz.UTC.localize(datetime(year=2015, month=7, day=13)), 267 | 'updated': pytz.UTC.localize(datetime(year=2015, month=7, day=20)), 268 | 'tweet_count': 20, 269 | 'source_hash': 159159159159, 270 | 'missing_data': '', 271 | 'positive_feedback': 50, 272 | 'negative_feedback': 5} 273 | 274 | bulk_obj1 = Article.objects.create(**bulk_art1) 275 | bulk_obj2 = Article.objects.create(**bulk_art2) 276 | 277 | find_five = Article.objects.search.query('match', title='five') 278 | self.assertEqual(len(find_five), 2, 'Searching for "five" in title did not return exactly two results (got {})'.format(find_five)) 279 | 280 | model_items = [bulk_obj1.pk, bulk_obj2.pk] 281 | model_name = Article.__name__ 282 | update_index(model_items, model_name, action='delete', bulk_size=2, num_docs=-1, start_date=None, end_date=None, refresh=True) 283 | 284 | find_four = Article.objects.search.query('match', title='four') 285 | self.assertEqual(len(find_four), 0, 'Searching for "four" in title did not return exactly zero results (got {})'.format(find_four)) 286 | find_five = Article.objects.search.query('match', title='five') 287 | self.assertEqual(len(find_five), 0, 'Searching for "five" in title did not return exactly zero results (got {})'.format(find_five)) 288 | 289 | def test_manager_interference(self): 290 | ''' 291 | This tests that saving an object which is not managed by Bungiesearch won't try to update the index for that model. 292 | ''' 293 | Unmanaged.objects.create(field_title='test', field_description='blah') 294 | 295 | def test_time_indexing(self): 296 | update_index(Article.objects.all(), 'Article', start_date=datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M')) 297 | update_index(NoUpdatedField.objects.all(), 'NoUpdatedField', end_date=datetime.strftime(datetime.now(), '%Y-%m-%d')) 298 | 299 | def test_optimal_queries(self): 300 | db_item = NoUpdatedField.objects.get(pk=1) 301 | src_item = NoUpdatedField.objects.search.query('match', field_title='My title')[0] 302 | self.assertEqual(src_item.id, db_item.id, 'Searching for the object did not return the expected object id.') 303 | self.assertEqual(src_item.get_deferred_fields(), {'field_description'}, 'Was expecting description in the set of deferred fields.') 304 | 305 | def test_concat_queries(self): 306 | items = Article.objects.bsearch_title_search('title')[::False] + NoUpdatedField.objects.search.query('match', field_title='My title')[::False] 307 | for item in items: 308 | model = item._meta.proxy_for_model if item._meta.proxy_for_model else type(item) 309 | self.assertIn(model, [Article, NoUpdatedField], 'Got an unmapped item ({}), or an item with an unexpected mapping.'.format(type(item))) 310 | 311 | def test_data_templates(self): 312 | # One article has a title that contains 'one' 313 | match_one = Article.objects.search.query('match', text='one') 314 | self.assertEqual(len(match_one), 2, 'Searching for "one" in text did not return exactly one item (got {}).'.format(match_one)) 315 | self.assertEqual(match_one[0].title, 'Title one', 'Searching for "one" in text did not yield the first article (got {})'.format(match_one[0].title)) 316 | 317 | # Two articles have a description that contain 'article' 318 | match_two = Article.objects.search.query('match', text='article') 319 | self.assertEqual(len(match_two), 4, 'Searching for "article" in text did not return exactly two items (got {})'.format(match_two)) 320 | 321 | # Two articles have a link with 'example,' but since link isn't in the template, there should be zero results 322 | match_zero = Article.objects.search.query('match', text='example') 323 | self.assertEqual(len(match_zero), 0, 'Searching for "article" in text did not return exactly zero items (got {})'.format(match_zero)) 324 | 325 | def test_fields(self): 326 | ''' 327 | Checking that providing a specific field will correctly fetch these items from elasticsearch. 328 | ''' 329 | for mdl, id_field in [(Article, 'id'), (User, 'user_id')]: 330 | raw_items = mdl.objects.search.fields('_id')[:5:True] 331 | self.assertTrue(all([dir(raw) == ['meta'] for raw in raw_items]), 'Requesting only _id returned more than just meta info from ES for model {}.'.format(mdl)) 332 | items = mdl.objects.search.fields('_id')[:5] 333 | self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id only search did not return all results for model {}.'.format(mdl)) 334 | items = mdl.objects.search.fields([id_field, '_id', '_source'])[:5] 335 | self.assertTrue(all([dbi in items for dbi in mdl.objects.all()]), 'Mapping after fields _id, id and _source search did not return all results for model {}.'.format(mdl)) 336 | 337 | def test_prepare_field(self): 338 | ''' 339 | Check that providing a method to calculate the value of a field will yield correct results in the search index. 340 | ''' 341 | user_int_description = {'user_id': 'bungie3', 342 | 'about': '123', 343 | 'created': pytz.UTC.localize(datetime(year=2015, month=1, day=1)), 344 | 'updated': pytz.UTC.localize(datetime(year=2015, month=6, day=1)), 345 | } 346 | User.objects.create(**user_int_description) 347 | 348 | find_one = User.objects.search.filter('term', int_about=1) 349 | self.assertEqual(len(find_one), 4, 'Searching for users with default int description did not return exactly 4 items (got {})'.format(find_one)) 350 | 351 | find_123 = User.objects.search.filter('term', int_about=123) 352 | self.assertEqual(len(find_one), 4, 'Searching for users with int description 123 did not return exactly 2 items (got {})'.format(find_123)) 353 | 354 | find_zero = User.objects.search.filter('term', int_about=0) 355 | self.assertEqual(len(find_zero), 0, 'Searching for users with int description zero did not return exactly 0 items (got {})'.format(find_zero)) 356 | 357 | def test_fun(self): 358 | ''' 359 | Test fun queries. 360 | ''' 361 | lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id') 362 | print(len(lazy)) # Returns the total hits computed by elasticsearch. 363 | assert all([type(item) == Article for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]]) 364 | 365 | def test_meta(self): 366 | ''' 367 | Test search meta is set. 368 | ''' 369 | lazy = Article.objects.bsearch_title_search('title').only('pk').fields('_id') 370 | assert all([hasattr(item._searchmeta) for item in lazy.filter('range', effective_date={'lte': '2014-09-22'})[5:7]]) 371 | 372 | def test_manangedbutempty(self): 373 | ''' 374 | Tests that the indexing condition controls indexing properly. 375 | ''' 376 | mbeo = ManangedButEmpty.objects.create(field_title='Some time', field_description='This should never be indexed.') 377 | idxi = len(ManangedButEmpty.objects.search) 378 | self.assertEquals(idxi, 0, 'ManagedButEmpty has {} indexed items instead of zero.'.format(idxi)) 379 | mbeo.delete() 380 | 381 | def test_specify_index(self): 382 | self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for Article does not match number in database.') 383 | self.assertEqual(Article.objects.count(), Article.objects.search_index('bungiesearch_demo_bis').count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database.') 384 | self.assertEqual(Article.objects.count(), Article.objects.bsearch_bisindex().count(), 'Indexed items on bungiesearch_demo_bis for Article does not match number in database, using alias.') 385 | self.assertEqual(NoUpdatedField.objects.count(), NoUpdatedField.objects.search_index('bungiesearch_demo').count(), 'Indexed items on bungiesearch_demo for NoUpdatedField does not match number in database.') 386 | self.assertEqual(NoUpdatedField.objects.search_index('bungiesearch_demo_bis').count(), 0, 'Indexed items on bungiesearch_demo_bis for NoUpdatedField is zero.') 387 | 388 | def test_None_as_missing(self): 389 | missing = Article.objects.search_index('bungiesearch_demo').filter('missing', field='text_field') 390 | self.assertEqual(len(missing), 1, 'Filtering by missing text_field does not return exactly one item.') 391 | self.assertEqual(missing[0].text_field, None, 'The item with missing text_field does not have text_field=None.') 392 | 393 | def test_signal_setup_teardown(self): 394 | ''' 395 | Tests that setup and tear down can be ran. 396 | ''' 397 | btsp = BungieTestSignalProcessor() 398 | btsp.setup(Article) 399 | self.assertTrue(btsp.setup_ran, 'Calling setup on the signal processor did not set it up.') 400 | btsp.teardown(Article) 401 | self.assertTrue(btsp.teardown_ran, 'Calling teardown on the signal processor did not tear it down.') 402 | -------------------------------------------------------------------------------- /tests/core/test_settings.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.test import TestCase 3 | 4 | from bungiesearch import Bungiesearch 5 | 6 | 7 | class SettingsTestCase(TestCase): 8 | 9 | def test_timeout_used(self): 10 | settings.BUNGIESEARCH['TIMEOUT'] = 29 11 | search = Bungiesearch() 12 | 13 | self.assertEqual(search.BUNGIE['TIMEOUT'], 29) 14 | self.assertEqual(search._using.transport.kwargs['timeout'], 29) 15 | -------------------------------------------------------------------------------- /tests/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") 7 | from django.core.management import execute_from_command_line 8 | execute_from_command_line(sys.argv) 9 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | DJANGO_SETTINGS_MODULE=tests.settings 3 | -------------------------------------------------------------------------------- /tests/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | DEBUG = True 5 | BASE_DIR = os.path.dirname(os.path.dirname(__file__)) 6 | SECRET_KEY = 'cookies_are_delicious_delicacies' 7 | ROOT_URLCONF = 'urls' 8 | LANGUAGE_CODE = 'en-us' 9 | TIME_ZONE = 'UTC' 10 | USE_I18N = True 11 | USE_L10N = True 12 | USE_TZ = True 13 | MIDDLEWARE_CLASSES = () 14 | DEFAULT_INDEX_TABLESPACE = '' 15 | 16 | # Make sure the copy of seeker in the directory above this one is used. 17 | sys.path.insert(0, BASE_DIR) 18 | 19 | INSTALLED_APPS = ( 20 | 'bungiesearch', 21 | 'core', 22 | ) 23 | 24 | DATABASES = { 25 | 'default': { 26 | 'ENGINE': 'django.db.backends.sqlite3', 27 | 'NAME': ':memory:', 28 | } 29 | } 30 | 31 | TEMPLATES = [ 32 | { 33 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 34 | 'DIRS': [], 35 | 'APP_DIRS': True, 36 | 'OPTIONS': { 37 | 'context_processors': [ 38 | 'django.contrib.auth.context_processors.auth', 39 | 'django.template.context_processors.debug', 40 | 'django.template.context_processors.i18n', 41 | 'django.template.context_processors.media', 42 | 'django.template.context_processors.static', 43 | 'django.template.context_processors.tz', 44 | 'django.contrib.messages.context_processors.messages', 45 | ], 46 | }, 47 | }, 48 | ] 49 | 50 | BUNGIESEARCH = { 51 | 'URLS': [os.getenv('ELASTIC_SEARCH_URL', 'localhost')], 52 | 'ES_SETTINGS': { 53 | 'http_auth': os.getenv('ELASTIC_SEARCH_AUTH') 54 | }, 55 | 'ALIASES': { 56 | 'bsearch': 'core.search_aliases' 57 | }, 58 | 'INDICES': { 59 | 'bungiesearch_demo': 'core.search_indices', 60 | 'bungiesearch_demo_bis': 'core.search_indices_bis' 61 | }, 62 | 'SIGNALS': { 63 | 'BUFFER_SIZE': 1, 64 | 'SIGNAL_CLASS': 'core.bungie_signal.BungieTestSignalProcessor' 65 | } 66 | } 67 | --------------------------------------------------------------------------------