├── tests ├── __init__.py └── test_wagtail_meilisearch.py ├── src └── wagtail_meilisearch │ ├── py.typed │ ├── __init__.py │ ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── meilisearch_indexes.py │ │ └── meilisearch_status.py │ ├── rebuilder.py │ ├── query.py │ ├── defaults.py │ ├── utils.py │ ├── settings.py │ ├── backend.py │ ├── results.py │ └── index.py ├── .gitignore ├── pyproject.toml ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_wagtail_meilisearch.py: -------------------------------------------------------------------------------- 1 | # One day. 2 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | ref/** 4 | */__pycache__/**/* 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "wagtail-meilisearch" 3 | version = "1.0.0" 4 | description = "A MeiliSearch backend for Wagtail" 5 | readme = "README.md" 6 | authors = [ 7 | { name = "Hactar", email = "systems@hactar.is" } 8 | ] 9 | requires-python = ">=3.10" 10 | dependencies = [ 11 | "arrow>=1.2.3", 12 | "wagtail>=6.0", 13 | "meilisearch>=0.36.0", 14 | ] 15 | 16 | [build-system] 17 | requires = ["uv_build>=0.7.19,<0.8.0"] 18 | build-backend = "uv_build" 19 | 20 | 21 | [tool.ruff] 22 | target-version = "py310" 23 | line-length = 100 24 | extend-exclude = ["tests"] 25 | 26 | [tool.ruff.lint] 27 | select = [ 28 | "E", "F", "B", "DJ", "C90", "S", "COM", "DTZ", "EM", 29 | "PT", "RET", "SIM", "TCH", "ARG", "PTH", "PERF" 30 | ] 31 | extend-ignore = ["E402", "RET504", "S101", "DJ012"] 32 | 33 | [tool.ruff.lint.isort] 34 | combine-as-imports = true 35 | force-wrap-aliases = true 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hactar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/rebuilder.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import TYPE_CHECKING, Optional, Type, Union 3 | 4 | if TYPE_CHECKING: 5 | from django.db.models import Model 6 | 7 | from .index import DummyModelIndex, MeiliSearchModelIndex 8 | from .utils import get_index_label 9 | 10 | 11 | class MeiliSearchRebuilder: 12 | def __init__(self, model_index: MeiliSearchModelIndex) -> None: 13 | self.index: MeiliSearchModelIndex = model_index 14 | self.uid: str = get_index_label(self.index.model) 15 | self.dummy_index: DummyModelIndex = DummyModelIndex() 16 | self.settings = model_index.settings 17 | 18 | def start(self) -> Union[MeiliSearchModelIndex, DummyModelIndex]: 19 | """ 20 | Starts the rebuild process for the search index. 21 | 22 | This method implements three strategies for rebuilding the index: 23 | - 'hard': Deletes every document in the index and adds them anew. 24 | - 'soft': Performs an "add or update" for each document. 25 | - 'delta': Only updates documents that have been saved in the last X amount of time. 26 | 27 | Returns: 28 | The appropriate index object for further operations. 29 | """ 30 | model: Optional[Type[Model]] = self.index.model 31 | if model and model._meta.label in self.index.backend.skip_models: 32 | sys.stdout.write(f"SKIPPING: {model._meta.label}\n") 33 | return self.dummy_index 34 | 35 | strategy: str = self.index.backend.update_strategy 36 | 37 | if strategy == "soft" or strategy == "delta": 38 | # Soft update strategy 39 | index = self.index.backend.get_index_for_model(model) 40 | else: 41 | # Hard update strategy 42 | old_index = self.index.backend.get_index_for_model(model) 43 | old_index.delete_all_documents() 44 | 45 | index: MeiliSearchModelIndex = self.index.backend.get_index_for_model(model) 46 | self.settings.apply_settings(index=index) 47 | return index 48 | 49 | def finish(self) -> None: 50 | pass 51 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/query.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generator, List, Optional, Type 2 | 3 | from django.db.models import Model, Q 4 | from wagtail.search.backends.base import BaseSearchQueryCompiler 5 | from wagtail.search.utils import OR 6 | 7 | from .utils import get_field_mapping 8 | 9 | 10 | class MeiliSearchQueryCompiler(BaseSearchQueryCompiler): 11 | """A query compiler for MeiliSearch. 12 | 13 | This class extends BaseSearchQueryCompiler to provide MeiliSearch-specific 14 | query compilation functionality. 15 | 16 | Attributes: 17 | queryset (QuerySet): The base queryset to search within. 18 | query (SearchQuery): The search query. 19 | fields (List[str]): The fields to search in. 20 | operator (str): The operator to use for combining search terms ('and' or 'or'). 21 | order_by_relevance (bool): Whether to order results by relevance. 22 | 23 | Methods: 24 | _process_lookup: Process a lookup for a field. 25 | _connect_filters: Connects multiple filters with a given connector. 26 | """ 27 | 28 | def _process_lookup(self, field: Any, lookup: str, value: Any) -> Q: 29 | """Process a lookup for a field. 30 | 31 | Args: 32 | field: The field to process the lookup for. 33 | lookup: The type of lookup to perform. 34 | value: The value to lookup. 35 | 36 | Returns: 37 | Q: A Q object representing the lookup. 38 | """ 39 | # Also borrowed from wagtail-whoosh 40 | return Q(**{field.get_attname(self.queryset.model) + "__" + lookup: value}) 41 | 42 | def _connect_filters(self, filters: List[Any], connector: str, negated: bool) -> Optional[Q]: 43 | """Connects multiple filters with a given connector. 44 | 45 | Args: 46 | filters: A list of filters to connect. 47 | connector: The type of connector to use ('AND' or 'OR'). 48 | negated: Whether to negate the resulting filter. 49 | 50 | Returns: 51 | Optional[Q]: A Q object representing the connected filters, 52 | or None if the connector is invalid. 53 | """ 54 | # Also borrowed from wagtail-whoosh 55 | if connector == "AND": 56 | q = Q(*filters) 57 | elif connector == "OR": 58 | q = OR([Q(fil) for fil in filters]) 59 | else: 60 | return None 61 | 62 | if negated: 63 | q = ~q 64 | 65 | return q 66 | 67 | 68 | class MeiliSearchAutocompleteQueryCompiler(MeiliSearchQueryCompiler): 69 | """A query compiler for MeiliSearch autocomplete searches. 70 | 71 | This class extends MeiliSearchQueryCompiler to provide specialized handling 72 | for autocomplete searches in MeiliSearch. 73 | """ 74 | 75 | def _get_fields_names(self) -> Generator[str, None, None]: 76 | """Generates field names for autocomplete search. 77 | 78 | This method yields the mapped field names for all autocomplete search fields 79 | of the model associated with the current queryset. 80 | 81 | Yields: 82 | str: The mapped field name for each autocomplete search field. 83 | """ 84 | model: Type[Model] = self.queryset.model 85 | for field in model.get_autocomplete_search_fields(): 86 | yield get_field_mapping(field) 87 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/meilisearch_indexes.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | import arrow 4 | from django.core.management.base import BaseCommand 5 | from wagtail.search.backends import get_search_backend 6 | 7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"] 8 | 9 | 10 | def human_readable_file_size(size_in_bytes: float) -> str: 11 | """Convert a size in bytes to a human-readable string. 12 | 13 | Args: 14 | size_in_bytes: The size in bytes to convert. 15 | 16 | Returns: 17 | str: A human-readable representation of the size with appropriate unit suffix. 18 | Returns 'Index too large' if the size exceeds the available units. 19 | """ 20 | index = 0 21 | while size_in_bytes >= 1024: 22 | size_in_bytes /= 1024 23 | index += 1 24 | try: 25 | rounded = "{0:.3f}".format(size_in_bytes) 26 | return f"{rounded} {SIZE_UNITS[index]}" 27 | except IndexError: 28 | return "Index too large" 29 | 30 | 31 | class Command(BaseCommand): 32 | """Command to display detailed information about each MeiliSearch index. 33 | 34 | This command retrieves and displays comprehensive settings and statistics 35 | for all MeiliSearch indexes in the system. 36 | """ 37 | 38 | help = "Display info about each Meilisearch index" 39 | 40 | def handle(self, *_args, **_kwargs) -> None: 41 | """Execute the command to display index information. 42 | 43 | Django passes arguments to this method, but we don't use them. 44 | The underscore prefix indicates these arguments are intentionally unused. 45 | """ 46 | b = get_search_backend() 47 | stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats() 48 | print(stats) 49 | indexes: Dict[str, Dict] = stats["indexes"] 50 | print("*" * 80) 51 | print(f"Total DB size: {human_readable_file_size(stats['databaseSize'])}") 52 | print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}") 53 | if not len(indexes): 54 | print("No indexes created yet") 55 | else: 56 | print("Indexes:") 57 | for k, v in indexes.items(): 58 | is_indexing = v["isIndexing"] 59 | index = b.client.get_index(k) 60 | settings = index.get_settings() 61 | settings.pop("stopWords") 62 | print(f"{k} - indexing: {is_indexing}") 63 | print(f"\t displayedAttributes: {settings.get('displayedAttributes')}") 64 | print(f"\t searchableAttributes: {settings.get('searchableAttributes')}") 65 | print(f"\t filterableAttributes: {settings.get('filterableAttributes')}") 66 | print(f"\t sortableAttributes: {settings.get('sortableAttributes')}") 67 | print(f"\t rankingRules: {settings.get('rankingRules')}") 68 | print(f"\t synonyms: {settings.get('synonyms')}") 69 | print(f"\t distinctAttribute: {settings.get('distinctAttribute')}") 70 | print(f"\t typoTolerance: {settings.get('typoTolerance')}") 71 | print(f"\t faceting: {settings.get('faceting')}") 72 | print(f"\t pagination: {settings.get('pagination')}") 73 | 74 | print("\n") 75 | print("*" * 80) 76 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/meilisearch_status.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Union 2 | 3 | import arrow 4 | from django.core.management.base import BaseCommand 5 | from wagtail.search.backends import get_search_backend 6 | 7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"] 8 | 9 | 10 | def human_readable_file_size(size_in_bytes: float) -> str: 11 | """Convert a size in bytes to a human-readable string. 12 | 13 | Args: 14 | size_in_bytes: The size in bytes to convert. 15 | 16 | Returns: 17 | str: A human-readable representation of the size with appropriate unit suffix. 18 | Returns 'Index too large' if the size exceeds the available units. 19 | """ 20 | index = 0 21 | while size_in_bytes >= 1024: 22 | size_in_bytes /= 1024 23 | index += 1 24 | try: 25 | rounded = "{0:.3f}".format(size_in_bytes) 26 | return f"{rounded} {SIZE_UNITS[index]}" 27 | except IndexError: 28 | return "Index too large" 29 | 30 | 31 | class Command(BaseCommand): 32 | """Command to display status information about MeiliSearch indexes. 33 | 34 | This command provides statistics about the MeiliSearch backend, 35 | including database size, last update time, and details about each index. 36 | """ 37 | 38 | help = "Print some stats about the meilisearch backend" 39 | 40 | def add_arguments(self, parser) -> None: 41 | """Add command line arguments. 42 | 43 | Args: 44 | parser: The argument parser to which arguments should be added. 45 | """ 46 | # Named (optional) arguments 47 | parser.add_argument( 48 | "--indexing", 49 | action="store_true", 50 | help="Show only models that MeiliSearch is currently indexing", 51 | ) 52 | parser.add_argument( 53 | "--models", 54 | type=str, 55 | help="Show only models in this comma separated list of model labels", 56 | ) 57 | 58 | def handle(self, **options) -> None: 59 | """Execute the command. 60 | 61 | Args: 62 | **options: Command options including 'models' and 'indexing'. 63 | """ 64 | models: List[str] = [] 65 | models_string: Optional[str] = options.get("models", "") 66 | if models_string: 67 | models = models_string.split(",") 68 | indexing: bool = options.get("indexing", False) 69 | 70 | # Get MeiliSearch backend and stats 71 | b = get_search_backend() 72 | stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats() 73 | indexes: Dict[str, Dict] = stats["indexes"] 74 | 75 | print("*" * 80) 76 | print(f"Index DB size: {human_readable_file_size(stats['databaseSize'])}") 77 | print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}") 78 | 79 | if not len(indexes): 80 | print("No indexes created yet") 81 | else: 82 | print("Indexes:") 83 | for k, v in indexes.items(): 84 | model = k.replace("-", ".") 85 | is_indexing = v["isIndexing"] 86 | 87 | # Filter by model name if models list is provided 88 | if len(models): 89 | if model in models: 90 | if indexing: 91 | if is_indexing: 92 | self._print_index_stats(model, v) 93 | else: 94 | self._print_index_stats(model, v) 95 | else: 96 | if indexing: 97 | if is_indexing: 98 | self._print_index_stats(model, v) 99 | else: 100 | self._print_index_stats(model, v) 101 | 102 | print("*" * 80) 103 | 104 | def _print_index_stats(self, model: str, v: Dict[str, Union[int, bool]]) -> None: 105 | """Print statistics for a specific index. 106 | 107 | Args: 108 | model: The model name (index label with dots instead of hyphens). 109 | v: Dictionary containing index statistics. 110 | """ 111 | print(f"{model}") 112 | print(f" Documents: {v['numberOfDocuments']}") 113 | if v["isIndexing"] is True: 114 | print(" INDEXING") 115 | print("") 116 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/defaults.py: -------------------------------------------------------------------------------- 1 | # Suffixes used for field mapping 2 | AUTOCOMPLETE_SUFFIX: str = "_ngrams" 3 | FILTER_SUFFIX: str = "_filter" 4 | 5 | 6 | DEFAULT_RANKING_RULES: list[str] = [ 7 | "words", 8 | "typo", 9 | "proximity", 10 | "attribute", 11 | "sort", 12 | "exactness", 13 | ] 14 | 15 | STOP_WORDS: list[str] = [ 16 | "a", 17 | "about", 18 | "after", 19 | "again", 20 | "against", 21 | "all", 22 | "almost", 23 | "also", 24 | "although", 25 | "always", 26 | "am", 27 | "amount", 28 | "an", 29 | "and", 30 | "another", 31 | "any", 32 | "anyhow", 33 | "anyone", 34 | "anything", 35 | "anyway", 36 | "anywhere", 37 | "are", 38 | "around", 39 | "as", 40 | "at", 41 | "back", 42 | "be", 43 | "became", 44 | "because", 45 | "become", 46 | "becomes", 47 | "becoming", 48 | "been", 49 | "before", 50 | "beforehand", 51 | "being", 52 | "besides", 53 | "between", 54 | "beyond", 55 | "both", 56 | "but", 57 | "by", 58 | "can", 59 | "cannot", 60 | "cant", 61 | "could", 62 | "couldnt", 63 | "de", 64 | "describe", 65 | "detail", 66 | "do", 67 | "done", 68 | "down", 69 | "due", 70 | "during", 71 | "each", 72 | "eg", 73 | "eight", 74 | "either", 75 | "eleven", 76 | "else", 77 | "elsewhere", 78 | "empty", 79 | "enough", 80 | "etc", 81 | "even", 82 | "ever", 83 | "every", 84 | "everyone", 85 | "everything", 86 | "everywhere", 87 | "except", 88 | "few", 89 | "find", 90 | "first", 91 | "for", 92 | "former", 93 | "formerly", 94 | "found", 95 | "from", 96 | "front", 97 | "full", 98 | "further", 99 | "get", 100 | "give", 101 | "go", 102 | "had", 103 | "has", 104 | "hasnt", 105 | "have", 106 | "he", 107 | "hence", 108 | "her", 109 | "here", 110 | "hereafter", 111 | "hereby", 112 | "herein", 113 | "hereupon", 114 | "hers", 115 | "him", 116 | "his", 117 | "how", 118 | "however", 119 | "i", 120 | "ie", 121 | "if", 122 | "in", 123 | "inc", 124 | "indeed", 125 | "interest", 126 | "into", 127 | "is", 128 | "it", 129 | "its", 130 | "keep", 131 | "last", 132 | "latter", 133 | "latterly", 134 | "least", 135 | "less", 136 | "ltd", 137 | "made", 138 | "many", 139 | "may", 140 | "me", 141 | "meanwhile", 142 | "might", 143 | "mine", 144 | "more", 145 | "moreover", 146 | "most", 147 | "mostly", 148 | "move", 149 | "much", 150 | "must", 151 | "my", 152 | "name", 153 | "namely", 154 | "neither", 155 | "never", 156 | "nevertheless", 157 | "next", 158 | "no", 159 | "nobody", 160 | "none", 161 | "noone", 162 | "nor", 163 | "not", 164 | "nothing", 165 | "now", 166 | "nowhere", 167 | "of", 168 | "off", 169 | "often", 170 | "on", 171 | "once", 172 | "one", 173 | "only", 174 | "onto", 175 | "or", 176 | "other", 177 | "others", 178 | "otherwise", 179 | "our", 180 | "ours", 181 | "ourselves", 182 | "out", 183 | "over", 184 | "own", 185 | "part", 186 | "per", 187 | "perhaps", 188 | "put", 189 | "rather", 190 | "re", 191 | "same", 192 | "see", 193 | "seem", 194 | "seemed", 195 | "seeming", 196 | "seems", 197 | "serious", 198 | "several", 199 | "she", 200 | "should", 201 | "show", 202 | "side", 203 | "since", 204 | "so", 205 | "some", 206 | "somehow", 207 | "someone", 208 | "something", 209 | "sometime", 210 | "sometimes", 211 | "somewhere", 212 | "still", 213 | "such", 214 | "take", 215 | "than", 216 | "that", 217 | "the", 218 | "their", 219 | "them", 220 | "themselves", 221 | "then", 222 | "there", 223 | "thereafter", 224 | "thereby", 225 | "therefore", 226 | "therein", 227 | "thereupon", 228 | "these", 229 | "they", 230 | "thick", 231 | "thin", 232 | "this", 233 | "those", 234 | "though", 235 | "through", 236 | "throughout", 237 | "thru", 238 | "thus", 239 | "to", 240 | "together", 241 | "too", 242 | "top", 243 | "toward", 244 | "towards", 245 | "un", 246 | "under", 247 | "until", 248 | "up", 249 | "upon", 250 | "us", 251 | "very", 252 | "via", 253 | "was", 254 | "we", 255 | "well", 256 | "were", 257 | "what", 258 | "whatever", 259 | "when", 260 | "whence", 261 | "whenever", 262 | "where", 263 | "whereafter", 264 | "whereas", 265 | "whereby", 266 | "wherein", 267 | "whereupon", 268 | "wherever", 269 | "whether", 270 | "which", 271 | "while", 272 | "who", 273 | "whoever", 274 | "whole", 275 | "whom", 276 | "whose", 277 | "why", 278 | "will", 279 | "with", 280 | "within", 281 | "without", 282 | "would", 283 | "yet", 284 | "you", 285 | "your", 286 | "yours", 287 | "yourself", 288 | "yourselves", 289 | ] 290 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import functools 3 | import weakref 4 | from functools import lru_cache 5 | from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union, cast 6 | 7 | from django.apps import apps 8 | from django.db.models import Manager, Model, QuerySet 9 | from wagtail.search.index import AutocompleteField, FilterField, RelatedFields, SearchField 10 | 11 | from .settings import AUTOCOMPLETE_SUFFIX, FILTER_SUFFIX 12 | 13 | # Type variables for generic functions 14 | T = TypeVar("T") 15 | F = TypeVar("F", bound=Callable[..., Any]) 16 | 17 | 18 | def weak_lru(maxsize: int = 128, typed: bool = False) -> Callable[[F], F]: 19 | """ 20 | LRU Cache decorator that keeps a weak reference to "self" and 21 | can be safely used on class methods 22 | """ 23 | 24 | def wrapper(func: F) -> F: 25 | @functools.lru_cache(maxsize, typed) 26 | def _func(_self: Callable[[], Any], *args: Any, **kwargs: Any) -> Any: 27 | return func(_self(), *args, **kwargs) 28 | 29 | @functools.wraps(func) 30 | def inner(self: Any, *args: Any, **kwargs: Any) -> Any: 31 | return _func(weakref.ref(self), *args, **kwargs) 32 | 33 | return cast("F", inner) 34 | 35 | return wrapper 36 | 37 | 38 | @lru_cache(maxsize=None) 39 | def get_index_label(model: Optional[Type[Model]]) -> str: 40 | """ 41 | Returns a unique label for the model's index. 42 | """ 43 | if model is None: 44 | return "" 45 | return model._meta.label.replace(".", "-") 46 | 47 | 48 | @lru_cache(maxsize=None) 49 | def get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str: 50 | """ 51 | Returns the appropriate field mapping based on the field type. 52 | """ 53 | if isinstance(field, FilterField): 54 | return field.field_name + FILTER_SUFFIX 55 | if isinstance(field, AutocompleteField): 56 | return field.field_name + AUTOCOMPLETE_SUFFIX 57 | return field.field_name 58 | 59 | 60 | @lru_cache(maxsize=None) 61 | def get_descendant_models(model: Type[Model]) -> List[Type[Model]]: 62 | """ 63 | Returns all descendants of a model. 64 | e.g. for a search on Page, return [HomePage, ContentPage, Page] etc. 65 | """ 66 | descendant_models = [ 67 | other_model for other_model in apps.get_models() if issubclass(other_model, model) 68 | ] 69 | return descendant_models 70 | 71 | 72 | @lru_cache(maxsize=None) 73 | def get_indexed_models() -> List[Type[Model]]: 74 | """ 75 | Returns a list of all models that are registered for indexing. 76 | """ 77 | from wagtail.search.index import get_indexed_models as wagtail_get_indexed_models 78 | 79 | return wagtail_get_indexed_models() 80 | 81 | 82 | def class_is_indexed(model: Type[Model]) -> bool: 83 | """ 84 | Returns True if the model is registered for indexing. 85 | """ 86 | from wagtail.search.index import class_is_indexed as wagtail_class_is_indexed 87 | 88 | return wagtail_class_is_indexed(model) 89 | 90 | 91 | def prepare_value(value: Any) -> str: 92 | """ 93 | Prepares a value for indexing. 94 | """ 95 | if not value: 96 | return "" 97 | if isinstance(value, str): 98 | return value 99 | if isinstance(value, list): 100 | return ", ".join(prepare_value(item) for item in value) 101 | if isinstance(value, dict): 102 | return ", ".join(prepare_value(item) for item in value.values()) 103 | if callable(value): 104 | return str(value()) 105 | return str(value) 106 | 107 | 108 | @lru_cache(maxsize=None) 109 | def get_document_fields(model: Type[Model], item: Model) -> Dict[str, str]: 110 | """ 111 | Walks through the model's search fields and returns a dictionary of fields to be indexed. 112 | """ 113 | doc_fields: Dict[str, str] = {} 114 | for field in model.get_search_fields(): 115 | if isinstance(field, (SearchField, FilterField, AutocompleteField)): 116 | with contextlib.suppress(Exception): 117 | doc_fields[get_field_mapping(field)] = prepare_value(field.get_value(item)) 118 | elif isinstance(field, RelatedFields): 119 | value = field.get_value(item) 120 | if isinstance(value, (Manager, QuerySet)): 121 | qs = value.all() 122 | for sub_field in field.fields: 123 | sub_values = qs.values_list(sub_field.field_name, flat=True) 124 | with contextlib.suppress(Exception): 125 | doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = ( 126 | prepare_value(list(sub_values)) 127 | ) 128 | elif isinstance(value, Model): 129 | for sub_field in field.fields: 130 | with contextlib.suppress(Exception): 131 | doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = ( 132 | prepare_value(sub_field.get_value(value)) 133 | ) 134 | return doc_fields 135 | 136 | 137 | def ranked_ids_from_search_results(results: Dict[str, Any]) -> List[Tuple[int, float]]: 138 | """ 139 | Extract all IDs and ranking scores from the hits in each index of the search results, 140 | sorted by ranking score in descending order. 141 | 142 | Args: 143 | results (Dict[str, Any]): The search results dictionary from MeiliSearch. 144 | Expected to have a 'results' key containing a list of index results, 145 | each with a 'hits' list containing objects with 'id' and '_rankingScore' keys. 146 | 147 | Returns: 148 | List[Tuple[int, float]]: A list of tuples containing (id, ranking_score) for each hit, 149 | sorted by ranking score in descending order. 150 | If a hit doesn't have a ranking score, it defaults to 0.0. 151 | """ 152 | items: List[Tuple[int, float]] = [] 153 | 154 | # Handle case where results is directly a single index result 155 | if "hits" in results: 156 | items.extend( 157 | (hit["id"], hit.get("_rankingScore", 0.0)) for hit in results["hits"] if "id" in hit 158 | ) 159 | return items 160 | 161 | # Handle case where results contains multiple index results 162 | if "results" in results: 163 | for index_result in results["results"]: 164 | if "hits" in index_result: 165 | items.extend( 166 | (hit["id"], hit.get("_rankingScore", 0.0)) 167 | for hit in index_result["hits"] 168 | if "id" in hit 169 | ) 170 | 171 | # Sort the results by ranking score in descending order 172 | return sorted(items, key=lambda x: x[1], reverse=True) 173 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/settings.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Any, Dict, List, Optional, Type, Union 3 | 4 | from django.db.models import Model 5 | from wagtail.search.index import AutocompleteField, FilterField, SearchField 6 | 7 | from .defaults import AUTOCOMPLETE_SUFFIX, DEFAULT_RANKING_RULES, FILTER_SUFFIX, STOP_WORDS 8 | 9 | 10 | def _get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str: 11 | """Returns the appropriate field mapping based on the field type. 12 | 13 | Args: 14 | field: The field to get the mapping for. Can be a SearchField, FilterField, 15 | or AutocompleteField. 16 | 17 | Returns: 18 | str: The field name with an appropriate suffix if needed. 19 | """ 20 | if isinstance(field, FilterField): 21 | return field.field_name + FILTER_SUFFIX 22 | if isinstance(field, AutocompleteField): 23 | return field.field_name + AUTOCOMPLETE_SUFFIX 24 | return field.field_name 25 | 26 | 27 | class MeiliSettings: 28 | """One class to hold all the settings to apply to the various indexes. 29 | 30 | This class centralizes all settings that need to be applied to MeiliSearch indexes 31 | and provides methods to apply these settings to specific indexes. 32 | 33 | Attributes: 34 | query_limit (int): Maximum number of results to return 35 | ranking_rules (List[str]): Rules for ranking search results 36 | skip_models (List[Type[Model]]): Models to skip indexing 37 | stop_words (List[str]): Words to exclude from search 38 | update_delta (Optional[Dict[str, int]]): Time delta for updates in delta strategy 39 | update_strategy (str): Strategy for updating indexes (soft, hard, delta) 40 | """ 41 | 42 | def __init__(self, params: Dict[str, Any]) -> None: 43 | """Initialize MeiliSettings with configuration parameters. 44 | 45 | Args: 46 | params: Dictionary containing configuration parameters for MeiliSearch. 47 | Accepted keys include: 48 | - STOP_WORDS: List of words to exclude from search 49 | - SKIP_MODELS: List of models to exclude from indexing 50 | - UPDATE_STRATEGY: Strategy for updating indexes ("soft", "hard", or "delta") 51 | - QUERY_LIMIT: Maximum number of results to return 52 | - RANKING_RULES: Rules for ranking search results 53 | - UPDATE_DELTA: Time delta for updates when using "delta" strategy 54 | """ 55 | self.stop_words: List[str] = params.get("STOP_WORDS", STOP_WORDS) 56 | self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", []) 57 | self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft") 58 | self.query_limit: int = params.get("QUERY_LIMIT", 999999) 59 | self.ranking_rules: List[str] = params.get("RANKING_RULES", DEFAULT_RANKING_RULES) 60 | self.update_delta: Optional[Dict[str, int]] = None 61 | self.index: Any = None 62 | if self.update_strategy == "delta": 63 | self.update_delta = params.get("UPDATE_DELTA", {"weeks": -1}) 64 | 65 | def apply_settings(self, index: Any) -> None: 66 | """Apply all settings to the specified index. 67 | 68 | This method applies pagination, searchable attributes, filterable attributes, 69 | ranking rules, and stop words settings to the given index. 70 | 71 | Args: 72 | index: The MeiliSearch index to apply settings to. 73 | """ 74 | self.index = index 75 | model = self.index.model 76 | 77 | self._apply_paginator(model=model, index=index) 78 | self._apply_searchable_attributes(model=model, index=index) 79 | self._apply_filterable_attributes(model=model, index=index) 80 | self._apply_ranking_rules(model=model, index=index) 81 | self._apply_stop_words(model=model, index=index) 82 | sys.stdout.write(f"Settings applied for {model}\n") 83 | 84 | def _apply_paginator(self, model: Optional[Type[Model]], index: Any) -> None: 85 | """Apply pagination settings to the index. 86 | 87 | Sets the maximum number of hits that can be returned by the index. 88 | 89 | Args: 90 | model: The model associated with the index. 91 | index: The MeiliSearch index to apply settings to. 92 | """ 93 | try: 94 | index.index.update_settings( 95 | { 96 | "pagination": { 97 | "maxTotalHits": self.query_limit, 98 | }, 99 | }, 100 | ) 101 | except Exception as err: 102 | sys.stdout.write(f"WARN: Failed to update paginator on {model}\n") 103 | sys.stdout.write(f"{err}\n") 104 | 105 | def _apply_searchable_attributes(self, model: Optional[Type[Model]], index: Any) -> None: 106 | """Apply searchable attributes settings to the index. 107 | 108 | Takes the searchable fields for a model, orders them by their boost score (descending) 109 | and then sends that to the index settings as searchableAttributes - a list of field names. 110 | 111 | Example: 112 | [ 113 | 'title', 114 | 'blurb', 115 | 'body', 116 | ] 117 | 118 | Args: 119 | model: The model to update searchable attributes for. 120 | index: The MeiliSearch index to apply settings to. 121 | """ 122 | if model is None: 123 | return 124 | 125 | ordered_fields: List[str] = self._ordered_fields(model) 126 | 127 | if not ordered_fields: 128 | return 129 | 130 | try: 131 | index.index.update_settings( 132 | { 133 | "searchableAttributes": ordered_fields, 134 | }, 135 | ) 136 | except Exception as err: 137 | sys.stdout.write(f"WARN: Failed to update searchable attributes on {model}: {err}\n") 138 | 139 | def _apply_filterable_attributes(self, model: Optional[Type[Model]], index: Any) -> None: 140 | """Apply filterable attributes settings to the index. 141 | 142 | Collects all FilterField fields from the model and sets them as filterable 143 | attributes in the MeiliSearch index. 144 | 145 | Args: 146 | model: The model to update filterable attributes for. 147 | index: The MeiliSearch index to apply settings to. 148 | """ 149 | # Add filter / facet fields 150 | filter_fields = ["content_type_id_filter"] 151 | for field in model.get_search_fields(): 152 | if isinstance(field, FilterField): 153 | try: # noqa: SIM105 154 | filter_fields.append(_get_field_mapping(field)) 155 | except Exception: # noqa: S110 156 | pass 157 | 158 | try: 159 | index.index.update_filterable_attributes(filter_fields) 160 | except Exception as err: 161 | sys.stdout.write(f"WARN: Failed to update filterable_attributes on {model}\n") 162 | sys.stdout.write(f"{err}\n") 163 | 164 | def _apply_ranking_rules(self, model: Optional[Type[Model]], index: Any) -> None: 165 | """Apply ranking rules settings to the index. 166 | 167 | Sets the ranking rules that determine the order of search results. 168 | 169 | Args: 170 | model: The model associated with the index. 171 | index: The MeiliSearch index to apply settings to. 172 | """ 173 | try: 174 | index.index.update_settings( 175 | { 176 | "rankingRules": self.ranking_rules, 177 | }, 178 | ) 179 | except Exception as err: 180 | sys.stdout.write(f"WARN: Failed to update ranking_rules on {model}\n") 181 | sys.stdout.write(f"{err}\n") 182 | 183 | def _apply_stop_words(self, model: Optional[Type[Model]], index: Any) -> None: 184 | """Apply stop words settings to the index. 185 | 186 | Sets the list of words that should be excluded from search indexing. 187 | 188 | Args: 189 | model: The model associated with the index. 190 | index: The MeiliSearch index to apply settings to. 191 | """ 192 | try: 193 | index.index.update_settings( 194 | { 195 | "stopWords": self.stop_words, 196 | }, 197 | ) 198 | except Exception as err: 199 | sys.stdout.write(f"WARN: Failed to update stop words on {model}\n") 200 | sys.stdout.write(f"{err}\n") 201 | 202 | def _ordered_fields(self, model: Type[Model]) -> List[str]: 203 | """Create a list of fields ordered by their boost values. 204 | 205 | Extracts searchable fields from the model and sorts them by their 206 | boost values in descending order (highest boost first). 207 | 208 | Args: 209 | model: The model to get field boosts for. 210 | 211 | Returns: 212 | List[str]: A list of field names ordered by their boost values in descending order. 213 | """ 214 | if not model or not hasattr(model, "search_fields"): 215 | return [] 216 | 217 | fields = [] 218 | for field in model.search_fields: 219 | if not isinstance(field, (SearchField, AutocompleteField)): 220 | continue 221 | boost = 1 222 | if hasattr(field, "boost"): 223 | # Ensure boost is a number, default to 1 if None or invalid 224 | try: 225 | boost = 1 if field.boost is None else field.boost 226 | except (TypeError, ValueError): 227 | boost = 1 228 | fields.append((field.field_name, boost)) # noqa: PERF401 229 | 230 | # Sort safely with a key function that handles None values 231 | def safe_sort_key(item): 232 | """Safe sorting key function that handles None boost values. 233 | 234 | Args: 235 | item: A tuple of (field_name, boost_value) 236 | 237 | Returns: 238 | int or float: The boost value or 0 if the boost is None 239 | """ 240 | _, boost = item 241 | # Return a default value (0) if boost is None 242 | return 0 if boost is None else boost 243 | 244 | sorted_fields = [field[0] for field in sorted(fields, key=safe_sort_key, reverse=True)] 245 | return sorted_fields 246 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/backend.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Type, TypeVar, Union 2 | 3 | import meilisearch 4 | from django.db.models import Model, QuerySet 5 | from django.utils.functional import cached_property 6 | from wagtail.search.backends.base import BaseSearchBackend, EmptySearchResults 7 | 8 | from .index import ( 9 | MeiliIndexRegistry, 10 | MeiliSearchModelIndex, 11 | ) 12 | from .query import MeiliSearchAutocompleteQueryCompiler, MeiliSearchQueryCompiler 13 | from .rebuilder import MeiliSearchRebuilder 14 | from .results import MeiliSearchResults 15 | from .settings import MeiliSettings 16 | from .utils import class_is_indexed, get_indexed_models 17 | 18 | T = TypeVar("T", bound=Model) 19 | 20 | 21 | class MeiliSearchBackend(BaseSearchBackend): 22 | """ 23 | A search backend implementation for MeiliSearch. 24 | 25 | This class provides methods to interact with MeiliSearch for indexing and searching content. 26 | """ 27 | 28 | query_compiler_class: MeiliSearchQueryCompiler = MeiliSearchQueryCompiler 29 | autocomplete_query_compiler_class: MeiliSearchAutocompleteQueryCompiler = ( 30 | MeiliSearchAutocompleteQueryCompiler 31 | ) 32 | results_class: MeiliSearchResults = MeiliSearchResults 33 | rebuilder_class: MeiliSearchRebuilder = MeiliSearchRebuilder 34 | 35 | def __init__(self, params: Dict[str, Any]) -> None: 36 | """ 37 | Initialize the MeiliSearchBackend. 38 | 39 | Args: 40 | params (dict): Configuration parameters for the backend. 41 | """ 42 | super().__init__(params) 43 | self.params = params 44 | self.client = self._init_client() 45 | self.settings = MeiliSettings(params) 46 | self.index_registry = MeiliIndexRegistry( 47 | backend=self, 48 | settings=self.settings, 49 | ) 50 | self.params: Dict[str, Any] = params 51 | self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", []) 52 | self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft") 53 | self.query_limit: int = params.get("QUERY_LIMIT", 999999) 54 | self.search_params: Dict[str, Any] = self._init_search_params() 55 | self.update_delta: Optional[Dict[str, int]] = self._init_update_delta() 56 | 57 | def get_index_for_model(self, model): 58 | """This gets called by the update_index management command and needs to exist 59 | as a method on the backend. 60 | 61 | Args: 62 | model (Model): The model we're looking for the index for 63 | 64 | Returns: 65 | MeiliSearchModelIndex: the index for the model 66 | """ 67 | return self.index_registry.get_index_for_model(model) 68 | 69 | @cached_property 70 | def client(self) -> meilisearch.Client: 71 | """ 72 | Lazily initialize and return the MeiliSearch client. 73 | 74 | Returns: 75 | meilisearch.Client: The initialized MeiliSearch client. 76 | """ 77 | if self._client is None: 78 | self._client = self._init_client() 79 | return self._client 80 | 81 | def _init_client(self) -> meilisearch.Client: 82 | """ 83 | Initialize the MeiliSearch client. 84 | 85 | Returns: 86 | meilisearch.Client: The initialized MeiliSearch client. 87 | 88 | Raises: 89 | Exception: If the client initialization fails. 90 | """ 91 | try: 92 | return meilisearch.Client( 93 | "{}:{}".format(self.params["HOST"], self.params["PORT"]), 94 | self.params["MASTER_KEY"], 95 | ) 96 | except Exception as err: 97 | msg = f"Failed to initialize MeiliSearch client: {err}" 98 | raise Exception(msg) from err 99 | 100 | def _init_search_params(self) -> Dict[str, Any]: 101 | """ 102 | Initialize the search parameters. 103 | 104 | Returns: 105 | dict: The initialized search parameters. 106 | """ 107 | return { 108 | "limit": self.query_limit, 109 | "attributesToRetrieve": ["id"], 110 | "showMatchesPosition": True, 111 | "showRankingScore": True, 112 | } 113 | 114 | def _init_update_delta(self) -> Optional[Dict[str, int]]: 115 | """ 116 | Initialize the update delta for the delta update strategy. 117 | 118 | Returns: 119 | dict or None: The update delta configuration or None if not using delta strategy. 120 | """ 121 | if self.update_strategy == "delta": 122 | return self.params.get("UPDATE_DELTA", {"weeks": -1}) 123 | return None 124 | 125 | def get_rebuilder(self) -> MeiliSearchRebuilder: 126 | """ 127 | Get the index rebuilder. 128 | 129 | Returns: 130 | MeiliSearchRebuilder: The index rebuilder. 131 | """ 132 | return self.rebuilder_class(self.get_index_for_model(None)) 133 | 134 | def reset_index(self) -> None: 135 | """Reset all indexes for indexed models.""" 136 | for model in get_indexed_models(): 137 | index = self.get_index_for_model(model) 138 | index._rebuild() 139 | 140 | def add_type(self, model: Type[Model]) -> None: 141 | """ 142 | Add a new model type to the index. 143 | 144 | Args: 145 | model: The model to add to the index. 146 | """ 147 | self.get_index_for_model(model).add_model(model) 148 | 149 | def refresh_index(self) -> None: 150 | """Refresh all indexes for indexed models.""" 151 | refreshed_indexes: List[MeiliSearchModelIndex] = [] 152 | for model in get_indexed_models(): 153 | index = self.get_index_for_model(model) 154 | if index not in refreshed_indexes: 155 | index.refresh() 156 | refreshed_indexes.append(index) 157 | 158 | def add(self, obj: Model) -> None: 159 | """ 160 | Add a single object to the index. 161 | 162 | Args: 163 | obj: The object to add to the index. 164 | """ 165 | self.get_index_for_model(type(obj)).add_item(obj) 166 | 167 | def add_bulk(self, model: Type[T], obj_list: List[T]) -> None: 168 | """ 169 | Add multiple objects to the index. 170 | 171 | Args: 172 | model: The model of the objects being added. 173 | obj_list (list): The list of objects to add to the index. 174 | """ 175 | index = self.get_index_for_model(model) 176 | index.add_items(model, obj_list) 177 | 178 | def delete(self, obj: Model) -> None: 179 | """ 180 | Delete an object from the index. 181 | 182 | Args: 183 | obj: The object to delete from the index. 184 | """ 185 | self.get_index_for_model(type(obj)).delete_item(obj) 186 | 187 | def _search( 188 | self, 189 | query_compiler_class: Union[ 190 | Type[MeiliSearchQueryCompiler], 191 | Type[MeiliSearchAutocompleteQueryCompiler], 192 | ], 193 | query: str, 194 | model_or_queryset: Union[Type[Model], QuerySet], 195 | **kwargs: Any, 196 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 197 | """ 198 | Perform a search using the specified query compiler. 199 | 200 | Args: 201 | query_compiler_class: The query compiler class to use. 202 | query (str): The search query. 203 | model_or_queryset: The model or queryset to search within. 204 | **kwargs: Additional search parameters. 205 | 206 | Returns: 207 | SearchResults: The search results. 208 | """ 209 | if isinstance(model_or_queryset, QuerySet): 210 | model = model_or_queryset.model 211 | queryset = model_or_queryset 212 | else: 213 | model = model_or_queryset 214 | queryset = model_or_queryset.objects.all() 215 | 216 | if not class_is_indexed(model): 217 | return EmptySearchResults() 218 | 219 | if query == "": 220 | return EmptySearchResults() 221 | 222 | search_query = query_compiler_class(queryset, query, **kwargs) 223 | search_query.check() 224 | 225 | return self.results_class(self, search_query) 226 | 227 | def search( 228 | self, 229 | query: str, 230 | model_or_queryset: Union[Type[Model], QuerySet], 231 | fields: Optional[List[str]] = None, 232 | operator: Optional[str] = None, 233 | order_by_relevance: bool = True, 234 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 235 | """ 236 | Perform a search. 237 | 238 | Args: 239 | query (str): The search query. 240 | model_or_queryset: The model or queryset to search within. 241 | fields (list, optional): The fields to search in. 242 | operator (str, optional): The operator to use for multiple search terms. 243 | order_by_relevance (bool, optional): Whether to order results by relevance. 244 | 245 | Returns: 246 | SearchResults: The search results. 247 | """ 248 | return self._search( 249 | self.query_compiler_class, 250 | query, 251 | model_or_queryset, 252 | fields=fields, 253 | operator=operator, 254 | order_by_relevance=order_by_relevance, 255 | ) 256 | 257 | def autocomplete( 258 | self, 259 | query: str, 260 | model_or_queryset: Union[Type[Model], QuerySet], 261 | fields: Optional[List[str]] = None, 262 | operator: Optional[str] = None, 263 | order_by_relevance: bool = True, 264 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 265 | """ 266 | Perform an autocomplete search. 267 | 268 | Args: 269 | query (str): The autocomplete query. 270 | model_or_queryset: The model or queryset to search within. 271 | fields (list, optional): The fields to search in. 272 | operator (str, optional): The operator to use for multiple search terms. 273 | order_by_relevance (bool, optional): Whether to order results by relevance. 274 | 275 | Returns: 276 | SearchResults: The autocomplete search results. 277 | """ 278 | return self._search( 279 | self.autocomplete_query_compiler_class, 280 | query, 281 | model_or_queryset, 282 | fields=fields, 283 | operator=operator, 284 | order_by_relevance=order_by_relevance, 285 | ) 286 | 287 | 288 | SearchBackend = MeiliSearchBackend 289 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/results.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from typing import Any, Dict, List, Optional, Tuple, Type 3 | 4 | from django.db.models import Case, Model, QuerySet, When 5 | from wagtail.search.backends.base import BaseSearchResults 6 | from wagtail.search.query import Fuzzy, Phrase, PlainText 7 | 8 | from .utils import get_descendant_models, get_index_label, ranked_ids_from_search_results, weak_lru 9 | 10 | 11 | class MeiliSearchResults(BaseSearchResults): 12 | """A class to handle search results from MeiliSearch. 13 | 14 | This class extends BaseSearchResults and provides methods to process 15 | and retrieve search results from MeiliSearch, including faceting and filtering 16 | capabilities. 17 | 18 | Attributes: 19 | _last_count: Cache for the last count result. 20 | supports_facet: Whether faceting is supported by this backend. 21 | """ 22 | 23 | _last_count: Optional[int] = None 24 | supports_facet: bool = True 25 | 26 | def facet(self, field_name: str) -> OrderedDict: 27 | """ 28 | Retrieve facet data for a given field from MeiliSearch. To use this, you'd do something 29 | like this: 30 | 31 | ```python 32 | Page.objects.search('query').facet('content_type_id') 33 | ``` 34 | and this returns an ordered dictionary containing the facet data, ordered by the count 35 | of each facet value, like this... 36 | 37 | ``` 38 | OrderedDict([('58', 197), ('75', 2), ('52', 1), ('54', 1), ('61', 1)]) 39 | ``` 40 | 41 | In this example, pages with the content type ID of 58 return 197 results, and so on. 42 | 43 | Args: 44 | field_name (str): The name of the field for which to retrieve facet data. 45 | 46 | Returns: 47 | OrderedDict: An ordered dictionary containing the facet data. 48 | """ 49 | qc = self.query_compiler 50 | model = qc.queryset.model 51 | models = get_descendant_models(model) 52 | try: 53 | terms = qc.query.query_string 54 | except AttributeError: 55 | return None 56 | filter_field = f"{field_name}_filter" 57 | 58 | results = OrderedDict() 59 | for m in models: 60 | index = self.backend.get_index_for_model(m) 61 | filterable_fields = index.client.index(index.label).get_filterable_attributes() 62 | if filter_field in filterable_fields: 63 | result = index.search( 64 | terms, 65 | { 66 | "facets": [filter_field], 67 | }, 68 | ) 69 | try: 70 | res = result["facetDistribution"][filter_field] 71 | except KeyError: 72 | pass 73 | else: 74 | results.update(res) 75 | 76 | # Sort the results 77 | sorted_dict = OrderedDict(sorted(results.items(), key=lambda x: x[1], reverse=True)) 78 | 79 | return sorted_dict 80 | 81 | def filter(self, filters: List[Tuple[str, str]], operator: str = "AND") -> QuerySet: 82 | """Filter search results based on field-value pairs. 83 | 84 | Takes a list of tuples containing filter fields and values as strings, 85 | and checks they're valid before passing them on to _do_search. 86 | 87 | Args: 88 | filters: A list of (field_name, value) tuples to filter by. 89 | Example: [('category', 'news'), ('author', 'john')] 90 | 91 | Returns: 92 | QuerySet: Filtered search results. 93 | 94 | Raises: 95 | ValueError: If no filters are provided or if filters are invalid. 96 | """ 97 | if not len(filters): 98 | msg = "No filters provided" 99 | raise ValueError(msg) 100 | 101 | for item in filters: 102 | if not isinstance(item, tuple) or len(item) != 2: 103 | msg = f"Invalid filter item: {item}" 104 | raise ValueError(msg) 105 | 106 | res = self._do_search(filters=filters, operator=operator) 107 | return res 108 | 109 | @weak_lru() 110 | def _get_field_boosts(self, model: Type[Model]) -> Dict[str, float]: 111 | """Get the boost values for fields in a given model. 112 | 113 | Args: 114 | model: The model to get field boosts for. 115 | 116 | Returns: 117 | Dict[str, float]: A dictionary mapping field names to their boost values. 118 | """ 119 | boosts = {} 120 | for field in model.search_fields: 121 | if hasattr(field, "boost"): 122 | boosts[field.field_name] = field.boost 123 | return boosts 124 | 125 | @property 126 | def models(self) -> List[Type[Model]]: 127 | """Get all descendant models of the queried model. 128 | 129 | Returns: 130 | List[Type[Model]]: A list of descendant models. 131 | """ 132 | return get_descendant_models(self.query_compiler.queryset.model) 133 | 134 | @property 135 | def query_string(self) -> str: 136 | """Get the query string from the query compiler. 137 | 138 | Returns: 139 | str: The query string if it's a PlainText, Phrase, or Fuzzy query, 140 | otherwise an empty string. 141 | """ 142 | query = self.query_compiler.query 143 | if isinstance(query, (PlainText, Phrase, Fuzzy)): 144 | return query.query_string 145 | return "" 146 | 147 | def _build_queries( 148 | self, 149 | models: List[Type[Model]], 150 | terms: str, 151 | filters: Optional[List[Tuple[str, str]]] = None, 152 | operator: str = "AND", 153 | ) -> List[Dict[str, Any]]: 154 | """Build a list of queries for MeiliSearch's multi-search API. 155 | 156 | Creates query dictionaries for each model and applies any filters, 157 | suitable for passing to MeiliSearch's multi-search API. 158 | 159 | Args: 160 | models: The models to search. 161 | terms: The search terms. 162 | filters: The filters to apply, as (field, value) tuples. 163 | Defaults to None. 164 | 165 | Returns: 166 | List[Dict[str, Any]]: A list of query dictionaries ready for the API. 167 | """ 168 | if filters is None: 169 | filters = [] 170 | 171 | # This block was actually part of the old boosts used before Meilisearch had 172 | # native ranking. However, if I remove this, somehow we end up searching 173 | # across all indexes instead of only those covered by the queryset we 174 | # want to search in. Eventually I'll work out why and remove this. 175 | models_boosts = {} 176 | for model in models: 177 | label = get_index_label(model) 178 | models_boosts[label] = self._get_field_boosts(model) 179 | 180 | # Get active indexes 181 | # For model types that don't have any documents, meilisearch won't 182 | # create an index, so we have to check before running multi_search 183 | # if an index exists, otherwise the entire multi_search call will fail. 184 | limit = self.backend.settings.query_limit 185 | active_index_dict = self.backend.client.get_indexes({"limit": limit}) 186 | active_indexes = [index for index in active_index_dict["results"]] 187 | 188 | queries = [] 189 | for index in active_indexes: 190 | filterable_fields = index.get_filterable_attributes() 191 | q = { # noqa: PERF401 192 | "indexUid": index.uid, 193 | "q": terms, 194 | **self.backend.search_params, 195 | } 196 | if len(filters): 197 | filter_list = [] 198 | for item in filters: 199 | filter_field = f"{item[0]}_filter" 200 | filter_value = item[1] 201 | if filter_field in filterable_fields: 202 | filter_list.append(f"{filter_field} = '{filter_value}'") 203 | q["filter"] = f" {operator} ".join(filter_list) 204 | queries.append(q) 205 | 206 | return queries 207 | 208 | def _do_search( 209 | self, 210 | filters: Optional[List[Tuple[str, str]]] = None, 211 | operator: str = "AND", 212 | ) -> QuerySet: 213 | """Perform the search operation. 214 | 215 | Executes the search query against MeiliSearch, processes the results, 216 | calculates scores, and returns the results in the order specified by the query compiler. 217 | 218 | Args: 219 | filters: Optional list of (field, value) tuples to filter the search results. 220 | Defaults to None. 221 | 222 | Returns: 223 | QuerySet: A queryset of search results, ordered by relevance if specified. 224 | """ 225 | models = self.models 226 | terms = self.query_string 227 | 228 | queries = self._build_queries(models, terms, filters, operator) 229 | multi_search_results = self.backend.client.multi_search(queries) 230 | 231 | # Get search results sorted by relevance score in descending order (highest scores first) 232 | # We do this here so that we can pre-sort the ID list by rank so that if we're searching 233 | # within a window of results, that window will only be searching within the top ranked 234 | # results. 235 | sorted_id_score_pairs = ranked_ids_from_search_results(multi_search_results) 236 | id_to_score = {id: score for id, score in sorted_id_score_pairs} 237 | sorted_ids = [id for id, _ in sorted_id_score_pairs] 238 | 239 | # Retrieve results from the database 240 | qc = self.query_compiler 241 | window_sorted_ids = sorted_ids[self.start : self.stop] 242 | results = qc.queryset.filter(pk__in=window_sorted_ids) 243 | 244 | # Preserve the order by relevance score by annotating with actual scores 245 | if qc.order_by_relevance and sorted_ids: 246 | # Create a mapping from ID to its actual ranking score 247 | # This directly uses the score values from MeiliSearch 248 | # Higher scores will be ordered first when we use descending order 249 | score_cases = [When(pk=pk, then=id_to_score.get(pk, 0.0)) for pk in sorted_ids] 250 | 251 | # Annotate the queryset with the actual scores 252 | preserved_score = Case(*score_cases, default=0.0) 253 | results = results.annotate(search_rank=preserved_score) 254 | 255 | # Order by the actual score in descending order (highest first) 256 | results = results.order_by("-search_rank") 257 | # Enable this for debugging 258 | # for result in results: 259 | # print(f"{result.search_rank}: {result.id} - {result.title}") 260 | 261 | res = results.distinct() 262 | 263 | return res 264 | 265 | def _do_count(self) -> int: 266 | """Count the total number of search results. 267 | 268 | This method gets called before _do_search when using Django's paginator. 269 | It ensures that _results_cache and _count_cache are properly populated. 270 | 271 | Note: 272 | This method gets called before _do_search when using Django pagination, 273 | which means _results_cache and _count_cache may be empty on first run. 274 | 275 | Returns: 276 | int: The total number of search results. 277 | """ 278 | if self._count_cache: 279 | return self._count_cache 280 | if self._results_cache: 281 | return len(self._results_cache) 282 | 283 | res = self._do_search() 284 | self._count_cache = res.count() 285 | self._results_cache = list(res) 286 | return self._count_cache 287 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wagtail MeiliSearch 2 | 3 | This is a Wagtail search backend for the [MeiliSearch](https://github.com/meilisearch/MeiliSearch) search engine. 4 | 5 | 6 | ## Installation 7 | 8 | `uv add wagtail_meilisearch` or `pip install wagtail_meilisearch` 9 | 10 | ## Upgrading 11 | 12 | If you're upgrading MeiliSearch from 0.9.x to anything higher, you will need to destroy and re-create MeiliSearch's data.ms directory. 13 | 14 | ## Requirements 15 | 16 | - Python >=3.10 17 | - wagtail >=6.0 18 | - meilisearch-python >= 0.36.0 19 | 20 | Tested against Meilisearch server v1.15.2 - latest at the time of writing. 21 | 22 | ## Configuration 23 | 24 | See the [MeiliSearch docs](https://docs.meilisearch.com/guides/advanced_guides/installation.html#environment-variables-and-flags) for info on the values you want to add here. 25 | 26 | ```python 27 | WAGTAILSEARCH_BACKENDS = { 28 | 'default': { 29 | 'BACKEND': 'wagtail_meilisearch.backend', 30 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 31 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 32 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '') 33 | }, 34 | } 35 | ``` 36 | 37 | ## Update strategies 38 | 39 | Indexing a very large site with `python manage.py update_index` can be pretty taxing on the CPU, take quite a long time, and reduce the responsiveness of the MeiliSearch server. Wagtail-MeiliSearch offers two update strategies, `soft` and `hard`. The default, `soft` strategy will do an "add or update" call for each document sent to it, while the `hard` strategy will delete every document in the index and then replace them. 40 | 41 | There are tradeoffs with either strategy - `hard` will guarantee that your search data matches your model data, but be hard work on the CPU for longer. `soft` will be faster and less CPU intensive, but if a field is removed from your model between indexings, that field data will remain in the search index. 42 | 43 | ### Delta strategy 44 | 45 | The `delta` strategy is useful if you habitually add created_at and updated_at timestamps to your models. This strategy will check the fields... 46 | 47 | * `first_published_at` 48 | * `last_published_at` 49 | * `created_at` 50 | * `updated_at` 51 | 52 | And only update the records for objects where one or more of these fields has a date more recent than the time delta specified in the settings. 53 | 54 | ```python 55 | WAGTAILSEARCH_BACKENDS = { 56 | 'default': { 57 | 'BACKEND': 'wagtail_meilisearch.backend', 58 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 59 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 60 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '') 61 | 'UPDATE_STRATEGY': delta, 62 | 'UPDATE_DELTA': { 63 | 'weeks': -1 64 | } 65 | } 66 | } 67 | ``` 68 | 69 | If the delta is set to `{'weeks': -1}`, wagtail-meilisearch will only update indexes for documents where one of the timestamp fields has a date within the last week. Your time delta _must_ be a negative. 70 | 71 | Under the hood we use [Arrow](https://arrow.readthedocs.io), so you can use any keyword args supported by [Arrow's `shift()`](https://arrow.readthedocs.io/en/latest/index.html#replace-shift). 72 | 73 | If you set `UPDATE_STRATEGY` to `delta` but don't provide a value for `UPDATE_DELTA` wagtail-meilisearch will default to `{'weeks': -1}`. 74 | 75 | ## Skip models 76 | 77 | Sometimes you might have a site where a certain page model is guaranteed not to change, for instance an archive section. After creating your initial search index, you can add a `SKIP_MODELS` key to the config to tell wagtail-meilisearch to ignore specific models when running `update_index`. Behind the scenes wagtail-meilisearch returns a dummy model index to the `update_index` management command for every model listed in your `SKIP_MODELS` - this ensures that this setting only affects `update_index`, so if you manually edit one of the models listed it should get re-indexed with the update signal. 78 | 79 | ```python 80 | WAGTAILSEARCH_BACKENDS = { 81 | 'default': { 82 | 'BACKEND': 'wagtail_meilisearch.backend', 83 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 84 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 85 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', ''), 86 | 'UPDATE_STRATEGY': 'delta', 87 | 'SKIP_MODELS': [ 88 | 'core.ArchivePage', 89 | ] 90 | } 91 | } 92 | ``` 93 | 94 | ## Stop Words 95 | 96 | Stop words are words for which we don't want to place significance on their frequency. For instance, the search query `tom and jerry` would return far less relevant results if the word `and` was given the same importance as `tom` and `jerry`. There's a fairly sane list of English language stop words supplied, but you can also supply your own. This is particularly useful if you have a lot of content in any other language. 97 | 98 | ```python 99 | MY_STOP_WORDS = ['a', 'list', 'of', 'words'] 100 | 101 | WAGTAILSEARCH_BACKENDS = { 102 | 'default': { 103 | 'BACKEND': 'wagtail_meilisearch.backend', 104 | [...] 105 | 'STOP_WORDS': MY_STOP_WORDS 106 | }, 107 | } 108 | ``` 109 | 110 | Or alternatively, you can extend the built in list. 111 | 112 | ```python 113 | from wagtail_meilisearch.settings import STOP_WORDS 114 | 115 | MY_STOP_WORDS = STOP_WORDS + WELSH_STOP_WORDS + FRENCH_STOP_WORDS 116 | 117 | WAGTAILSEARCH_BACKENDS = { 118 | 'default': { 119 | 'BACKEND': 'wagtail_meilisearch.backend', 120 | [...] 121 | 'STOP_WORDS': MY_STOP_WORDS 122 | }, 123 | } 124 | ``` 125 | 126 | ## Ranking 127 | 128 | We now support Meilisearch's native ranking system which is considerably faster than the rather hacky way we were having to do it before. Meilisearch takes a [list of fields ordered by precedence](https://www.meilisearch.com/docs/learn/relevancy/attribute_ranking_order) to affect the attribute ranking so we build that list by inspecting the `index.SearchField`s and `index.AutocompleteField`s on each model and ordering by boost. As an example, if you want the page title to be the most important field to rank on... 129 | 130 | ```python 131 | search_fields = Page.search_fields + [ 132 | index.AutocompleteField("title", boost=10), 133 | index.SearchField("body"), 134 | index.SearchField("search_description", boost=5), 135 | ] 136 | 137 | ``` 138 | 139 | Any field that doesn't have a `boost` value will be given a default of 0 but will still be sent to Meilisearch's settings as part of the ordered list, so the above settings send an attribute ranking order to Meilisearch of... 140 | 141 | ```python 142 | ['title', 'search_description', 'body'] 143 | ``` 144 | 145 | In the backend, we automatically annotate the search results with their ranking, with a float between 0 and 1 as `search_rank` so in your search view you can sort by that value. 146 | 147 | ```python 148 | def search_view(request): 149 | search_query = request.GET.get('query', '') 150 | search_results = Page.objects.search(search_query) 151 | 152 | # Results are already sorted by search_rank 153 | # You can access the rank for each result 154 | for result in search_results: 155 | print(f"Result: {result.title}, Rank: {result.search_rank}") 156 | 157 | return render(request, 'search_results.html.j2', { 158 | 'search_query': search_query, 159 | 'search_results': search_results, 160 | }) 161 | ``` 162 | 163 | And you might even fancy using the search rank in your template... 164 | 165 | ```jinja2 166 | {% for result in search_results %} 167 |
168 |

{{ result.title }}

169 |

Relevance: {{ result.search_rank }}

170 |
171 | {% endfor %} 172 | ``` 173 | 174 | ## Faceting 175 | 176 | We now support faceting. In order to use it, you need to add `FilterField`s to your model on any field that you might want to facet on... 177 | 178 | ```python 179 | search_fields = Page.search_fields + [ 180 | index.AutocompleteField("title", boost=10), 181 | index.SearchField("body"), 182 | index.SearchField("search_description", boost=5), 183 | index.FilterField("category"), 184 | ] 185 | ``` 186 | 187 | With that in place, you can call `facet` on a search to get an OrderedDict of the facet values and their counts. By default, Wagtail adds several `FilterField`s to the Page model too, so for instance you can get the facet results of `content_type_id` with... 188 | 189 | ```python 190 | Page.objects.search("query").facet("content_type") 191 | 192 | # OrderedDict([('58', 197), ('75', 2), ('52', 1), ('54', 1), ('61', 1)]) 193 | ``` 194 | 195 | The ordered dict contains tuples of the form `(value, count)` where `value: str` is the value of the field (typically its pk) and `count` is the number of documents that have that value. 196 | 197 | ### Filtering 198 | 199 | Armed with your facet counts, you can filter your search results by passing `filters` to the `filter` method. For example, to filter by `content_type_id`... 200 | 201 | ```python 202 | Page.objects.search("query").filter(filters=[("content_type", "58")]) 203 | 204 | # , , ...] 205 | ``` 206 | 207 | The `filters` param should be a list of tuples, where each tuple is of the form `(field, value)`. Being a list, you can pass multiple tuples to filter by multiple fields. For example, to filter by `content_type` and `category`... 208 | 209 | ```python 210 | Page.objects.search("query").filter(filters=[("content_type", "58"), ("category", "1")]) 211 | 212 | # , , ...] 213 | ``` 214 | 215 | And finally, you can choose the operator for the filter. By default, the operator is `AND`, but you can also use `OR`... 216 | 217 | ```python 218 | Page.objects.search("query").filter(filters=[("content_type", "58"), ("category", "1")], operator="OR") 219 | 220 | # , , ...] 221 | ``` 222 | 223 | ## Query limits 224 | 225 | If you have a lot of DB documents, the final query to the database can be quite a heavy load. Meilisearch's relevance means that it's usually pretty safe to restrict the number of documents Meilisearch returns, and therefore the number of documents your app needs to get from the database. The limit is **per model**, so if your project has 10 page types and you set a limit of 1000, there's a possible 10000 results. 226 | 227 | ```python 228 | WAGTAILSEARCH_BACKENDS = { 229 | 'default': { 230 | 'BACKEND': 'wagtail_meilisearch.backend', 231 | [...] 232 | 'QUERY_LIMIT': 1000 233 | }, 234 | } 235 | ``` 236 | 237 | ## Contributing 238 | 239 | If you want to help with the development I'd be more than happy. The vast majority of the heavy lifting is done by MeiliSearch itself, but there is a TODO list... 240 | 241 | 242 | ### TODO 243 | 244 | * Write tests 245 | * Performance improvements 246 | * Make use of the async in meilisearch-python 247 | * ~~Faceting~~ 248 | * ~~Implement boosting in the sort algorithm~~ 249 | * ~~Implement stop words~~ 250 | * ~~Search results~~ 251 | * ~~Add support for the autocomplete api~~ 252 | * ~~Ensure we're getting results by relevance~~ 253 | 254 | ## Change Log 255 | 256 | #### 1.0.0 257 | * Big speed improvements thanks to using Meilisearch's native ranking system 258 | * Adds faceting 259 | * Adds filtering 260 | * Adds typing throughout 261 | 262 | #### 0.17.3 263 | * Fixes a bug where the meilisearch indexes could end up with a wrong maxTotalHits 264 | 265 | #### 0.17.2 266 | * Fixes a bug where the backend could report the wrong counts for results. This turned out to be down to the fact that _do_count can sometimes get called before _do_search, possibly due to Django's paginator. This finally explains why sometimes search queries ran twice. 267 | 268 | #### 0.17.1 269 | * Fixes a bug where multi_search can fail when a model index doesn't exist. For models have no documents meilisearch doesn't create the empty index, so we need to check active indexes before calling multi_search otherwise the entire call fails. 270 | 271 | #### 0.17.0 272 | * A few small performance and reliability improvements, and a lot of refactoring of the code into multiple files to make future development a bit simpler. 273 | 274 | #### 0.16.0 275 | * Thanks to @BertrandBordage, a massive speed improvement through using the /multi-search endpoint introduced in Meilisearch 1.1.0 276 | 277 | #### 0.14.0 278 | * Adds Django 4 support and compatibility with the latest meilisearch server (0.30.2) and meilisearch python (0.23.0) 279 | 280 | #### 0.14.0 281 | * Updates to work with the latest versions of Meilisearch (v0.28.1) and meilisearch-python (^0.19.1) 282 | 283 | #### 0.13.0 284 | * Yanked, sorry 285 | 286 | #### 0.12.0 287 | * Adds QUERY_LIMIT option to settings 288 | 289 | #### 0.11.0 290 | * Compatibility changes to keep up with MeiliSearch and [meilisearch-python](https://github.com/meilisearch/meilisearch-python) 291 | * we've also switched to more closely tracking the major and minor version numbers of meilisearch-python so that it's easier to see compatibility at a glance. 292 | * Note: if you're upgrading from an old version of MeiliSearch you may need to destroy MeiliSearch's data directory and start with a clean index. 293 | 294 | #### 0.1.5 295 | * Adds the delta update strategy 296 | * Adds the SKIP_MODELS setting 297 | * Adds support for using boost on your search fields 298 | 299 | 300 | ### Thanks 301 | 302 | Thank you to the devs of [Wagtail-Whoosh](https://github.com/wagtail/wagtail-whoosh). Reading the code over there was the only way I could work out how Wagtail Search backends are supposed to work. 303 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/index.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | 3 | # Import for type checking only 4 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, cast 5 | 6 | import arrow 7 | from django.core.cache import cache 8 | from django.db.models import Model 9 | from django.utils.functional import cached_property 10 | from meilisearch.index import Index 11 | from requests.exceptions import HTTPError 12 | 13 | from .utils import get_document_fields 14 | 15 | if TYPE_CHECKING: 16 | from .backend import MeiliSearchBackend 17 | from .settings import MeiliSettings 18 | 19 | try: 20 | from cacheops import invalidate_model 21 | 22 | USING_CACHEOPS = True 23 | except ImportError: 24 | USING_CACHEOPS = False 25 | 26 | 27 | class MeiliIndexError(Exception): 28 | pass 29 | 30 | 31 | class MeiliIndexRegistry: 32 | """A registry of all the indexes we're using. 33 | 34 | This class maintains a registry of all MeiliSearch indexes and provides methods 35 | to retrieve and manage them. 36 | 37 | Attributes: 38 | indexes (Dict[str, MeiliSearchModelIndex]): Dictionary mapping labels to index objects. 39 | """ 40 | 41 | indexes: Dict[str, "MeiliSearchModelIndex"] = {} 42 | 43 | def __init__(self, backend: Any, settings: Any) -> None: 44 | """Initialize the MeiliIndexRegistry. 45 | 46 | Args: 47 | backend: The search backend instance. 48 | settings: The settings for the search backend. 49 | """ 50 | self.backend = backend 51 | self.client = backend.client 52 | self.settings = settings 53 | 54 | def _get_label(self, model: Type[Model]) -> str: 55 | """Get a unique label for the model's index. 56 | 57 | Args: 58 | model: The model to get the label for. 59 | 60 | Returns: 61 | str: A unique label for the model's index. 62 | """ 63 | label = model._meta.label.replace(".", "-") 64 | return label 65 | 66 | def get_index_for_model(self, model: Type[Model]) -> "MeiliSearchModelIndex": 67 | """Get the index for a specific model. 68 | 69 | This gets called by the get_index_for_model in the backend which in turn is called by 70 | update_index management command so needs to exist as a method on the backend. 71 | 72 | Args: 73 | model: The model we're looking for the index for. 74 | 75 | Returns: 76 | MeiliSearchModelIndex: The index for the model. 77 | """ 78 | label = self._get_label(model) 79 | 80 | # See if it's in our registry 81 | if label in self.indexes: 82 | return self.indexes.get(label) 83 | 84 | # See if it's in the cache 85 | cache_key = f"meili_index_{label}" 86 | index = cache.get(cache_key) 87 | if index is None: 88 | index = MeiliSearchModelIndex( 89 | backend=self.backend, 90 | model=model, 91 | ) 92 | cache.set(cache_key, index) 93 | 94 | self.register(label, index) 95 | return index 96 | 97 | def register(self, label: str, index: "MeiliSearchModelIndex") -> None: 98 | """Register an index with a label. 99 | 100 | Args: 101 | label: The label to register the index under. 102 | index: The index to register. 103 | """ 104 | self.indexes[label] = index 105 | 106 | def _refresh(self, uid: str, model: Type[Model]) -> "MeiliSearchModelIndex": 107 | """Refresh an index by deleting and recreating it. 108 | 109 | Args: 110 | uid: The unique ID of the index to refresh. 111 | model: The model associated with the index. 112 | 113 | Returns: 114 | MeiliSearchModelIndex: The newly created index. 115 | """ 116 | index = self.client.get_index(uid) 117 | index.delete() 118 | new_index = self.get_index_for_model(model) 119 | return new_index 120 | 121 | 122 | class MeiliSearchModelIndex: 123 | """Creates a working index for each model sent to it.""" 124 | 125 | def __init__(self, backend: Any, model: Optional[Type[Model]]) -> None: 126 | """Initialize the MeiliSearchModelIndex. 127 | 128 | Creates a working index for the specified model and sets up all the necessary 129 | properties for interacting with MeiliSearch. 130 | 131 | Args: 132 | backend: The backend instance. 133 | model: The Django model to be indexed. 134 | """ 135 | self.backend: "MeiliSearchBackend" = backend 136 | self.settings: "MeiliSettings" = backend.settings 137 | settings: "MeiliSettings" = self.settings 138 | self.model: Optional[Type[Model]] = model 139 | 140 | self.client: Any = backend.client 141 | self.query_limit: int = settings.query_limit 142 | self.name: str = "" if model is None else model._meta.label 143 | self.model_fields: Set[str] = set() 144 | if model is not None: 145 | self.model_fields = set(_.name for _ in model._meta.fields) 146 | 147 | self.index: Index = self._set_index(model) 148 | self.search_params: Dict[str, Any] = { 149 | "limit": self.query_limit, 150 | "attributesToRetrieve": ["id", "first_published_at"], 151 | "showMatchesPosition": True, 152 | } 153 | self.update_strategy: str = settings.update_strategy 154 | self.update_delta: Optional[Dict[str, int]] = settings.update_delta 155 | self.delta_fields: List[str] = [ 156 | "created_at", 157 | "updated_at", 158 | "first_published_at", 159 | "last_published_at", 160 | ] 161 | self.label: str = "" if model is None else self._get_label(model) 162 | 163 | def _get_index_settings(self, label: str) -> Dict[str, Any]: 164 | """Get the settings for the index. 165 | 166 | Retrieves the current settings for the specified MeiliSearch index. 167 | 168 | Args: 169 | label: The label of the index. 170 | 171 | Returns: 172 | Dict[str, Any]: The settings for the index. 173 | 174 | Raises: 175 | MeiliIndexError: If unable to get the index settings. 176 | """ 177 | try: 178 | return self.client.get_index(label).get_settings() 179 | except Exception as err: 180 | msg = f"Failed to get settings for {label}: {err}" 181 | raise MeiliIndexError(msg) from err 182 | 183 | def _set_index(self, model: Optional[Type[Model]]) -> Index: 184 | """Set up the index for the given model. 185 | 186 | Creates or retrieves the MeiliSearch index for the specified model. 187 | 188 | Args: 189 | model: The Django model to create an index for. 190 | 191 | Returns: 192 | Index: The MeiliSearch index object. 193 | """ 194 | if hasattr(self, "index") and self.index: 195 | return self.index 196 | 197 | if model is None: 198 | return cast("Index", None) # This should never be reached in practice 199 | 200 | label = self._get_label(model) 201 | # if index doesn't exist, create 202 | try: 203 | index = self.client.index(label) 204 | except HTTPError: 205 | # Create the index with primary key setting 206 | Index.create(self.client.http.config, label, {"primaryKey": "id"}) 207 | index = self.client.index(label) 208 | 209 | self.index = index 210 | 211 | return index 212 | 213 | def _get_label(self, model: Type[Model]) -> str: 214 | """Get a unique label for the model's index. 215 | 216 | Args: 217 | model: The model to get the label for. 218 | 219 | Returns: 220 | str: A unique label for the model's index. 221 | """ 222 | if hasattr(self, "label") and self.label: 223 | return self.label 224 | 225 | self.label = label = model._meta.label.replace(".", "-") 226 | return label 227 | 228 | def _rebuild(self) -> None: 229 | """Rebuild the index by deleting and recreating it. 230 | 231 | This method completely recreates the index, which will remove all 232 | documents and reset all settings. 233 | """ 234 | self.index.delete() 235 | self._set_index(self.model) 236 | 237 | def add_model(self, model: Type[Model]) -> None: 238 | """ 239 | Add a model to the index. This method is a no-op as adding is done on initialization. 240 | 241 | Args: 242 | model (Model): The Django model to add to the index. 243 | """ 244 | pass 245 | 246 | def get_index_for_model(self, model: Type[Model]) -> "MeiliSearchModelIndex": 247 | """ 248 | Get the index for the given model. 249 | 250 | Args: 251 | model (Model): The Django model to get the index for. 252 | 253 | Returns: 254 | MeiliSearchModelIndex: The index for the given model. 255 | """ 256 | self._set_index(model) 257 | return self 258 | 259 | def _get_document_fields(self, model: Type[Model], item: Model) -> Dict[str, Any]: 260 | """Get the fields for a document to be indexed. 261 | 262 | Extracts all indexable fields from the item using the model's search field definitions. 263 | 264 | Args: 265 | model: The Django model of the item. 266 | item: The item to be indexed. 267 | 268 | Returns: 269 | Dict[str, Any]: The fields of the document to be indexed. 270 | """ 271 | return get_document_fields(model, item) 272 | 273 | def _create_document(self, model: Type[Model], item: Model) -> Dict[str, Any]: 274 | """Create a document to be indexed. 275 | 276 | Builds a complete document dictionary with all fields and the ID for indexing. 277 | 278 | Args: 279 | model: The Django model of the item. 280 | item: The item to be indexed. 281 | 282 | Returns: 283 | Dict[str, Any]: The document to be indexed. 284 | """ 285 | doc_fields = dict(self._get_document_fields(model, item)) 286 | doc_fields.update(id=item.id) 287 | return doc_fields 288 | 289 | def refresh(self) -> None: 290 | """Refresh the index. 291 | 292 | This method is a no-op in the current implementation. 293 | It exists to maintain compatibility with the Wagtail search API. 294 | """ 295 | pass 296 | 297 | def add_item(self, item: Model) -> None: 298 | """Add a single item to the index. 299 | 300 | Indexes a single model instance according to the current update strategy. 301 | If using the delta update strategy, only adds the item if it was modified 302 | within the delta time period. 303 | 304 | Args: 305 | item: The item to be added to the index. 306 | """ 307 | if self.update_strategy == "delta": 308 | checked = self._check_deltas([item]) 309 | if len(checked): 310 | item = checked[0] 311 | 312 | if self.model is None: 313 | return 314 | 315 | doc = self._create_document(self.model, item) 316 | if self.update_strategy == "soft": 317 | self.index.update_documents([doc]) 318 | else: 319 | self.index.add_documents([doc]) 320 | 321 | def add_items(self, item_model: Type[Model], items: List[Model]) -> bool: 322 | """Add multiple items to the index. 323 | 324 | Indexes multiple model instances according to the current update strategy. 325 | Processes items in chunks of 100 to avoid overwhelming the MeiliSearch instance. 326 | If using the delta update strategy, only adds items that were modified 327 | within the delta time period. 328 | 329 | Args: 330 | item_model: The Django model of the items. 331 | items: The items to be added to the index. 332 | 333 | Returns: 334 | bool: True if the operation was successful. 335 | """ 336 | if USING_CACHEOPS: 337 | with contextlib.suppress(Exception): 338 | invalidate_model(item_model) 339 | 340 | chunks: List[List[Model]] = [items[x : x + 100] for x in range(0, len(items), 100)] 341 | 342 | for chunk in chunks: 343 | if self.update_strategy == "delta": 344 | chunk = self._check_deltas(chunk) 345 | if self.model is None: 346 | continue 347 | prepared = [self._create_document(self.model, item) for item in chunk] 348 | with contextlib.suppress(Exception): 349 | if prepared: 350 | if self.update_strategy in ["soft", "delta"]: 351 | self.index.update_documents(prepared) 352 | else: 353 | self.index.add_documents(prepared) 354 | return True 355 | 356 | @cached_property 357 | def _has_date_fields(self) -> bool: 358 | """Check if the model has any of the delta fields. 359 | 360 | Determines if the model has any fields that can be used for delta updates 361 | (created_at, updated_at, first_published_at, last_published_at). 362 | 363 | Returns: 364 | bool: True if the model has any of the delta fields, False otherwise. 365 | """ 366 | return bool(self.model_fields.intersection(self.delta_fields)) 367 | 368 | def _check_deltas(self, objects: List[Model]) -> List[Model]: 369 | """Filter objects based on the delta update strategy. 370 | 371 | When using the delta update strategy, this method filters the objects list 372 | to only include items that have been created or modified within the 373 | specified time period. 374 | 375 | Args: 376 | objects: The objects to be filtered. 377 | 378 | Returns: 379 | List[Model]: The filtered list of objects. 380 | """ 381 | filtered: List[Model] = [] 382 | if not self.update_delta: 383 | return filtered 384 | 385 | since = arrow.now().shift(**self.update_delta).datetime 386 | for obj in objects: 387 | if self._has_date_fields: 388 | for field in self.delta_fields: 389 | if hasattr(obj, field): 390 | val = getattr(obj, field) 391 | try: 392 | if val and val > since: 393 | filtered.append(obj) 394 | break 395 | except TypeError: 396 | pass 397 | return filtered 398 | 399 | def delete_item(self, obj: Model) -> None: 400 | """Delete an item from the index. 401 | 402 | Removes a single document from the index based on its ID. 403 | 404 | Args: 405 | obj: The object to be deleted from the index. 406 | """ 407 | self.index.delete_document(obj.id) 408 | 409 | def delete_all_documents(self) -> None: 410 | """Delete all documents from the index. 411 | 412 | Removes all documents from the index while preserving the index settings. 413 | This is faster than deleting and recreating the index. 414 | """ 415 | self.index.delete_all_documents() 416 | 417 | def search(self, query: str, extras: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 418 | """Perform a search on the index. 419 | 420 | Executes a search query against the MeiliSearch index with the specified 421 | search parameters. 422 | 423 | Args: 424 | query: The search query string. 425 | extras: Optional additional search parameters to include in the request. 426 | These will be merged with the default search parameters. 427 | 428 | Returns: 429 | Dict[str, Any]: The search results from MeiliSearch. 430 | """ 431 | if extras is None: 432 | extras = {} 433 | params = self.backend.search_params 434 | if len(extras): 435 | params.update(**extras) 436 | 437 | return self.index.search(query, params) 438 | 439 | def __str__(self) -> str: 440 | """Get a string representation of the index. 441 | 442 | Returns the name of the index for easy identification. 443 | 444 | Returns: 445 | str: The name of the index. 446 | """ 447 | return self.name 448 | 449 | 450 | class DummyModelIndex: 451 | """A dummy model index that performs no actual indexing operations. 452 | 453 | This class enables the SKIP_MODELS feature by providing a dummy 454 | implementation of the MeiliSearchModelIndex interface that can receive 455 | add operations without actually indexing anything. 456 | 457 | This is useful for models that should be excluded from search but still 458 | need to go through the indexing workflow. 459 | """ 460 | 461 | def add_model(self, model: Type[Model]) -> None: 462 | """Add a model to the index (no-op). 463 | 464 | Args: 465 | model: The model to be added (ignored). 466 | """ 467 | pass 468 | 469 | def add_items(self, model: Type[Model], chunk: List[Model]) -> None: 470 | """Add items to the index (no-op). 471 | 472 | Args: 473 | model: The model of the items (ignored). 474 | chunk: The items to be added (ignored). 475 | """ 476 | pass 477 | --------------------------------------------------------------------------------