├── tests ├── __init__.py └── test_wagtail_meilisearch.py ├── src └── wagtail_meilisearch │ ├── py.typed │ ├── __init__.py │ ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── meilisearch_indexes.py │ │ └── meilisearch_status.py │ ├── rebuilder.py │ ├── query.py │ ├── defaults.py │ ├── utils.py │ ├── settings.py │ ├── backend.py │ ├── results.py │ └── index.py ├── .gitignore ├── pyproject.toml ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_wagtail_meilisearch.py: -------------------------------------------------------------------------------- 1 | # One day. 2 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | ref/** 4 | */__pycache__/**/* 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "wagtail-meilisearch" 3 | version = "1.0.0" 4 | description = "A MeiliSearch backend for Wagtail" 5 | readme = "README.md" 6 | authors = [ 7 | { name = "Hactar", email = "systems@hactar.is" } 8 | ] 9 | requires-python = ">=3.10" 10 | dependencies = [ 11 | "arrow>=1.2.3", 12 | "wagtail>=6.0", 13 | "meilisearch>=0.36.0", 14 | ] 15 | 16 | [build-system] 17 | requires = ["uv_build>=0.7.19,<0.8.0"] 18 | build-backend = "uv_build" 19 | 20 | 21 | [tool.ruff] 22 | target-version = "py310" 23 | line-length = 100 24 | extend-exclude = ["tests"] 25 | 26 | [tool.ruff.lint] 27 | select = [ 28 | "E", "F", "B", "DJ", "C90", "S", "COM", "DTZ", "EM", 29 | "PT", "RET", "SIM", "TCH", "ARG", "PTH", "PERF" 30 | ] 31 | extend-ignore = ["E402", "RET504", "S101", "DJ012"] 32 | 33 | [tool.ruff.lint.isort] 34 | combine-as-imports = true 35 | force-wrap-aliases = true 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hactar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/rebuilder.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import TYPE_CHECKING, Optional, Type, Union 3 | 4 | if TYPE_CHECKING: 5 | from django.db.models import Model 6 | 7 | from .index import DummyModelIndex, MeiliSearchModelIndex 8 | from .utils import get_index_label 9 | 10 | 11 | class MeiliSearchRebuilder: 12 | def __init__(self, model_index: MeiliSearchModelIndex) -> None: 13 | self.index: MeiliSearchModelIndex = model_index 14 | self.uid: str = get_index_label(self.index.model) 15 | self.dummy_index: DummyModelIndex = DummyModelIndex() 16 | self.settings = model_index.settings 17 | 18 | def start(self) -> Union[MeiliSearchModelIndex, DummyModelIndex]: 19 | """ 20 | Starts the rebuild process for the search index. 21 | 22 | This method implements three strategies for rebuilding the index: 23 | - 'hard': Deletes every document in the index and adds them anew. 24 | - 'soft': Performs an "add or update" for each document. 25 | - 'delta': Only updates documents that have been saved in the last X amount of time. 26 | 27 | Returns: 28 | The appropriate index object for further operations. 29 | """ 30 | model: Optional[Type[Model]] = self.index.model 31 | if model and model._meta.label in self.index.backend.skip_models: 32 | sys.stdout.write(f"SKIPPING: {model._meta.label}\n") 33 | return self.dummy_index 34 | 35 | strategy: str = self.index.backend.update_strategy 36 | 37 | if strategy == "soft" or strategy == "delta": 38 | # Soft update strategy 39 | index = self.index.backend.get_index_for_model(model) 40 | else: 41 | # Hard update strategy 42 | old_index = self.index.backend.get_index_for_model(model) 43 | old_index.delete_all_documents() 44 | 45 | index: MeiliSearchModelIndex = self.index.backend.get_index_for_model(model) 46 | self.settings.apply_settings(index=index) 47 | return index 48 | 49 | def finish(self) -> None: 50 | pass 51 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/query.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Generator, List, Optional, Type 2 | 3 | from django.db.models import Model, Q 4 | from wagtail.search.backends.base import BaseSearchQueryCompiler 5 | from wagtail.search.utils import OR 6 | 7 | from .utils import get_field_mapping 8 | 9 | 10 | class MeiliSearchQueryCompiler(BaseSearchQueryCompiler): 11 | """A query compiler for MeiliSearch. 12 | 13 | This class extends BaseSearchQueryCompiler to provide MeiliSearch-specific 14 | query compilation functionality. 15 | 16 | Attributes: 17 | queryset (QuerySet): The base queryset to search within. 18 | query (SearchQuery): The search query. 19 | fields (List[str]): The fields to search in. 20 | operator (str): The operator to use for combining search terms ('and' or 'or'). 21 | order_by_relevance (bool): Whether to order results by relevance. 22 | 23 | Methods: 24 | _process_lookup: Process a lookup for a field. 25 | _connect_filters: Connects multiple filters with a given connector. 26 | """ 27 | 28 | def _process_lookup(self, field: Any, lookup: str, value: Any) -> Q: 29 | """Process a lookup for a field. 30 | 31 | Args: 32 | field: The field to process the lookup for. 33 | lookup: The type of lookup to perform. 34 | value: The value to lookup. 35 | 36 | Returns: 37 | Q: A Q object representing the lookup. 38 | """ 39 | # Also borrowed from wagtail-whoosh 40 | return Q(**{field.get_attname(self.queryset.model) + "__" + lookup: value}) 41 | 42 | def _connect_filters(self, filters: List[Any], connector: str, negated: bool) -> Optional[Q]: 43 | """Connects multiple filters with a given connector. 44 | 45 | Args: 46 | filters: A list of filters to connect. 47 | connector: The type of connector to use ('AND' or 'OR'). 48 | negated: Whether to negate the resulting filter. 49 | 50 | Returns: 51 | Optional[Q]: A Q object representing the connected filters, 52 | or None if the connector is invalid. 53 | """ 54 | # Also borrowed from wagtail-whoosh 55 | if connector == "AND": 56 | q = Q(*filters) 57 | elif connector == "OR": 58 | q = OR([Q(fil) for fil in filters]) 59 | else: 60 | return None 61 | 62 | if negated: 63 | q = ~q 64 | 65 | return q 66 | 67 | 68 | class MeiliSearchAutocompleteQueryCompiler(MeiliSearchQueryCompiler): 69 | """A query compiler for MeiliSearch autocomplete searches. 70 | 71 | This class extends MeiliSearchQueryCompiler to provide specialized handling 72 | for autocomplete searches in MeiliSearch. 73 | """ 74 | 75 | def _get_fields_names(self) -> Generator[str, None, None]: 76 | """Generates field names for autocomplete search. 77 | 78 | This method yields the mapped field names for all autocomplete search fields 79 | of the model associated with the current queryset. 80 | 81 | Yields: 82 | str: The mapped field name for each autocomplete search field. 83 | """ 84 | model: Type[Model] = self.queryset.model 85 | for field in model.get_autocomplete_search_fields(): 86 | yield get_field_mapping(field) 87 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/meilisearch_indexes.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | import arrow 4 | from django.core.management.base import BaseCommand 5 | from wagtail.search.backends import get_search_backend 6 | 7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"] 8 | 9 | 10 | def human_readable_file_size(size_in_bytes: float) -> str: 11 | """Convert a size in bytes to a human-readable string. 12 | 13 | Args: 14 | size_in_bytes: The size in bytes to convert. 15 | 16 | Returns: 17 | str: A human-readable representation of the size with appropriate unit suffix. 18 | Returns 'Index too large' if the size exceeds the available units. 19 | """ 20 | index = 0 21 | while size_in_bytes >= 1024: 22 | size_in_bytes /= 1024 23 | index += 1 24 | try: 25 | rounded = "{0:.3f}".format(size_in_bytes) 26 | return f"{rounded} {SIZE_UNITS[index]}" 27 | except IndexError: 28 | return "Index too large" 29 | 30 | 31 | class Command(BaseCommand): 32 | """Command to display detailed information about each MeiliSearch index. 33 | 34 | This command retrieves and displays comprehensive settings and statistics 35 | for all MeiliSearch indexes in the system. 36 | """ 37 | 38 | help = "Display info about each Meilisearch index" 39 | 40 | def handle(self, *_args, **_kwargs) -> None: 41 | """Execute the command to display index information. 42 | 43 | Django passes arguments to this method, but we don't use them. 44 | The underscore prefix indicates these arguments are intentionally unused. 45 | """ 46 | b = get_search_backend() 47 | stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats() 48 | print(stats) 49 | indexes: Dict[str, Dict] = stats["indexes"] 50 | print("*" * 80) 51 | print(f"Total DB size: {human_readable_file_size(stats['databaseSize'])}") 52 | print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}") 53 | if not len(indexes): 54 | print("No indexes created yet") 55 | else: 56 | print("Indexes:") 57 | for k, v in indexes.items(): 58 | is_indexing = v["isIndexing"] 59 | index = b.client.get_index(k) 60 | settings = index.get_settings() 61 | settings.pop("stopWords") 62 | print(f"{k} - indexing: {is_indexing}") 63 | print(f"\t displayedAttributes: {settings.get('displayedAttributes')}") 64 | print(f"\t searchableAttributes: {settings.get('searchableAttributes')}") 65 | print(f"\t filterableAttributes: {settings.get('filterableAttributes')}") 66 | print(f"\t sortableAttributes: {settings.get('sortableAttributes')}") 67 | print(f"\t rankingRules: {settings.get('rankingRules')}") 68 | print(f"\t synonyms: {settings.get('synonyms')}") 69 | print(f"\t distinctAttribute: {settings.get('distinctAttribute')}") 70 | print(f"\t typoTolerance: {settings.get('typoTolerance')}") 71 | print(f"\t faceting: {settings.get('faceting')}") 72 | print(f"\t pagination: {settings.get('pagination')}") 73 | 74 | print("\n") 75 | print("*" * 80) 76 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/management/commands/meilisearch_status.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional, Union 2 | 3 | import arrow 4 | from django.core.management.base import BaseCommand 5 | from wagtail.search.backends import get_search_backend 6 | 7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"] 8 | 9 | 10 | def human_readable_file_size(size_in_bytes: float) -> str: 11 | """Convert a size in bytes to a human-readable string. 12 | 13 | Args: 14 | size_in_bytes: The size in bytes to convert. 15 | 16 | Returns: 17 | str: A human-readable representation of the size with appropriate unit suffix. 18 | Returns 'Index too large' if the size exceeds the available units. 19 | """ 20 | index = 0 21 | while size_in_bytes >= 1024: 22 | size_in_bytes /= 1024 23 | index += 1 24 | try: 25 | rounded = "{0:.3f}".format(size_in_bytes) 26 | return f"{rounded} {SIZE_UNITS[index]}" 27 | except IndexError: 28 | return "Index too large" 29 | 30 | 31 | class Command(BaseCommand): 32 | """Command to display status information about MeiliSearch indexes. 33 | 34 | This command provides statistics about the MeiliSearch backend, 35 | including database size, last update time, and details about each index. 36 | """ 37 | 38 | help = "Print some stats about the meilisearch backend" 39 | 40 | def add_arguments(self, parser) -> None: 41 | """Add command line arguments. 42 | 43 | Args: 44 | parser: The argument parser to which arguments should be added. 45 | """ 46 | # Named (optional) arguments 47 | parser.add_argument( 48 | "--indexing", 49 | action="store_true", 50 | help="Show only models that MeiliSearch is currently indexing", 51 | ) 52 | parser.add_argument( 53 | "--models", 54 | type=str, 55 | help="Show only models in this comma separated list of model labels", 56 | ) 57 | 58 | def handle(self, **options) -> None: 59 | """Execute the command. 60 | 61 | Args: 62 | **options: Command options including 'models' and 'indexing'. 63 | """ 64 | models: List[str] = [] 65 | models_string: Optional[str] = options.get("models", "") 66 | if models_string: 67 | models = models_string.split(",") 68 | indexing: bool = options.get("indexing", False) 69 | 70 | # Get MeiliSearch backend and stats 71 | b = get_search_backend() 72 | stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats() 73 | indexes: Dict[str, Dict] = stats["indexes"] 74 | 75 | print("*" * 80) 76 | print(f"Index DB size: {human_readable_file_size(stats['databaseSize'])}") 77 | print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}") 78 | 79 | if not len(indexes): 80 | print("No indexes created yet") 81 | else: 82 | print("Indexes:") 83 | for k, v in indexes.items(): 84 | model = k.replace("-", ".") 85 | is_indexing = v["isIndexing"] 86 | 87 | # Filter by model name if models list is provided 88 | if len(models): 89 | if model in models: 90 | if indexing: 91 | if is_indexing: 92 | self._print_index_stats(model, v) 93 | else: 94 | self._print_index_stats(model, v) 95 | else: 96 | if indexing: 97 | if is_indexing: 98 | self._print_index_stats(model, v) 99 | else: 100 | self._print_index_stats(model, v) 101 | 102 | print("*" * 80) 103 | 104 | def _print_index_stats(self, model: str, v: Dict[str, Union[int, bool]]) -> None: 105 | """Print statistics for a specific index. 106 | 107 | Args: 108 | model: The model name (index label with dots instead of hyphens). 109 | v: Dictionary containing index statistics. 110 | """ 111 | print(f"{model}") 112 | print(f" Documents: {v['numberOfDocuments']}") 113 | if v["isIndexing"] is True: 114 | print(" INDEXING") 115 | print("") 116 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/defaults.py: -------------------------------------------------------------------------------- 1 | # Suffixes used for field mapping 2 | AUTOCOMPLETE_SUFFIX: str = "_ngrams" 3 | FILTER_SUFFIX: str = "_filter" 4 | 5 | 6 | DEFAULT_RANKING_RULES: list[str] = [ 7 | "words", 8 | "typo", 9 | "proximity", 10 | "attribute", 11 | "sort", 12 | "exactness", 13 | ] 14 | 15 | STOP_WORDS: list[str] = [ 16 | "a", 17 | "about", 18 | "after", 19 | "again", 20 | "against", 21 | "all", 22 | "almost", 23 | "also", 24 | "although", 25 | "always", 26 | "am", 27 | "amount", 28 | "an", 29 | "and", 30 | "another", 31 | "any", 32 | "anyhow", 33 | "anyone", 34 | "anything", 35 | "anyway", 36 | "anywhere", 37 | "are", 38 | "around", 39 | "as", 40 | "at", 41 | "back", 42 | "be", 43 | "became", 44 | "because", 45 | "become", 46 | "becomes", 47 | "becoming", 48 | "been", 49 | "before", 50 | "beforehand", 51 | "being", 52 | "besides", 53 | "between", 54 | "beyond", 55 | "both", 56 | "but", 57 | "by", 58 | "can", 59 | "cannot", 60 | "cant", 61 | "could", 62 | "couldnt", 63 | "de", 64 | "describe", 65 | "detail", 66 | "do", 67 | "done", 68 | "down", 69 | "due", 70 | "during", 71 | "each", 72 | "eg", 73 | "eight", 74 | "either", 75 | "eleven", 76 | "else", 77 | "elsewhere", 78 | "empty", 79 | "enough", 80 | "etc", 81 | "even", 82 | "ever", 83 | "every", 84 | "everyone", 85 | "everything", 86 | "everywhere", 87 | "except", 88 | "few", 89 | "find", 90 | "first", 91 | "for", 92 | "former", 93 | "formerly", 94 | "found", 95 | "from", 96 | "front", 97 | "full", 98 | "further", 99 | "get", 100 | "give", 101 | "go", 102 | "had", 103 | "has", 104 | "hasnt", 105 | "have", 106 | "he", 107 | "hence", 108 | "her", 109 | "here", 110 | "hereafter", 111 | "hereby", 112 | "herein", 113 | "hereupon", 114 | "hers", 115 | "him", 116 | "his", 117 | "how", 118 | "however", 119 | "i", 120 | "ie", 121 | "if", 122 | "in", 123 | "inc", 124 | "indeed", 125 | "interest", 126 | "into", 127 | "is", 128 | "it", 129 | "its", 130 | "keep", 131 | "last", 132 | "latter", 133 | "latterly", 134 | "least", 135 | "less", 136 | "ltd", 137 | "made", 138 | "many", 139 | "may", 140 | "me", 141 | "meanwhile", 142 | "might", 143 | "mine", 144 | "more", 145 | "moreover", 146 | "most", 147 | "mostly", 148 | "move", 149 | "much", 150 | "must", 151 | "my", 152 | "name", 153 | "namely", 154 | "neither", 155 | "never", 156 | "nevertheless", 157 | "next", 158 | "no", 159 | "nobody", 160 | "none", 161 | "noone", 162 | "nor", 163 | "not", 164 | "nothing", 165 | "now", 166 | "nowhere", 167 | "of", 168 | "off", 169 | "often", 170 | "on", 171 | "once", 172 | "one", 173 | "only", 174 | "onto", 175 | "or", 176 | "other", 177 | "others", 178 | "otherwise", 179 | "our", 180 | "ours", 181 | "ourselves", 182 | "out", 183 | "over", 184 | "own", 185 | "part", 186 | "per", 187 | "perhaps", 188 | "put", 189 | "rather", 190 | "re", 191 | "same", 192 | "see", 193 | "seem", 194 | "seemed", 195 | "seeming", 196 | "seems", 197 | "serious", 198 | "several", 199 | "she", 200 | "should", 201 | "show", 202 | "side", 203 | "since", 204 | "so", 205 | "some", 206 | "somehow", 207 | "someone", 208 | "something", 209 | "sometime", 210 | "sometimes", 211 | "somewhere", 212 | "still", 213 | "such", 214 | "take", 215 | "than", 216 | "that", 217 | "the", 218 | "their", 219 | "them", 220 | "themselves", 221 | "then", 222 | "there", 223 | "thereafter", 224 | "thereby", 225 | "therefore", 226 | "therein", 227 | "thereupon", 228 | "these", 229 | "they", 230 | "thick", 231 | "thin", 232 | "this", 233 | "those", 234 | "though", 235 | "through", 236 | "throughout", 237 | "thru", 238 | "thus", 239 | "to", 240 | "together", 241 | "too", 242 | "top", 243 | "toward", 244 | "towards", 245 | "un", 246 | "under", 247 | "until", 248 | "up", 249 | "upon", 250 | "us", 251 | "very", 252 | "via", 253 | "was", 254 | "we", 255 | "well", 256 | "were", 257 | "what", 258 | "whatever", 259 | "when", 260 | "whence", 261 | "whenever", 262 | "where", 263 | "whereafter", 264 | "whereas", 265 | "whereby", 266 | "wherein", 267 | "whereupon", 268 | "wherever", 269 | "whether", 270 | "which", 271 | "while", 272 | "who", 273 | "whoever", 274 | "whole", 275 | "whom", 276 | "whose", 277 | "why", 278 | "will", 279 | "with", 280 | "within", 281 | "without", 282 | "would", 283 | "yet", 284 | "you", 285 | "your", 286 | "yours", 287 | "yourself", 288 | "yourselves", 289 | ] 290 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import functools 3 | import weakref 4 | from functools import lru_cache 5 | from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union, cast 6 | 7 | from django.apps import apps 8 | from django.db.models import Manager, Model, QuerySet 9 | from wagtail.search.index import AutocompleteField, FilterField, RelatedFields, SearchField 10 | 11 | from .settings import AUTOCOMPLETE_SUFFIX, FILTER_SUFFIX 12 | 13 | # Type variables for generic functions 14 | T = TypeVar("T") 15 | F = TypeVar("F", bound=Callable[..., Any]) 16 | 17 | 18 | def weak_lru(maxsize: int = 128, typed: bool = False) -> Callable[[F], F]: 19 | """ 20 | LRU Cache decorator that keeps a weak reference to "self" and 21 | can be safely used on class methods 22 | """ 23 | 24 | def wrapper(func: F) -> F: 25 | @functools.lru_cache(maxsize, typed) 26 | def _func(_self: Callable[[], Any], *args: Any, **kwargs: Any) -> Any: 27 | return func(_self(), *args, **kwargs) 28 | 29 | @functools.wraps(func) 30 | def inner(self: Any, *args: Any, **kwargs: Any) -> Any: 31 | return _func(weakref.ref(self), *args, **kwargs) 32 | 33 | return cast("F", inner) 34 | 35 | return wrapper 36 | 37 | 38 | @lru_cache(maxsize=None) 39 | def get_index_label(model: Optional[Type[Model]]) -> str: 40 | """ 41 | Returns a unique label for the model's index. 42 | """ 43 | if model is None: 44 | return "" 45 | return model._meta.label.replace(".", "-") 46 | 47 | 48 | @lru_cache(maxsize=None) 49 | def get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str: 50 | """ 51 | Returns the appropriate field mapping based on the field type. 52 | """ 53 | if isinstance(field, FilterField): 54 | return field.field_name + FILTER_SUFFIX 55 | if isinstance(field, AutocompleteField): 56 | return field.field_name + AUTOCOMPLETE_SUFFIX 57 | return field.field_name 58 | 59 | 60 | @lru_cache(maxsize=None) 61 | def get_descendant_models(model: Type[Model]) -> List[Type[Model]]: 62 | """ 63 | Returns all descendants of a model. 64 | e.g. for a search on Page, return [HomePage, ContentPage, Page] etc. 65 | """ 66 | descendant_models = [ 67 | other_model for other_model in apps.get_models() if issubclass(other_model, model) 68 | ] 69 | return descendant_models 70 | 71 | 72 | @lru_cache(maxsize=None) 73 | def get_indexed_models() -> List[Type[Model]]: 74 | """ 75 | Returns a list of all models that are registered for indexing. 76 | """ 77 | from wagtail.search.index import get_indexed_models as wagtail_get_indexed_models 78 | 79 | return wagtail_get_indexed_models() 80 | 81 | 82 | def class_is_indexed(model: Type[Model]) -> bool: 83 | """ 84 | Returns True if the model is registered for indexing. 85 | """ 86 | from wagtail.search.index import class_is_indexed as wagtail_class_is_indexed 87 | 88 | return wagtail_class_is_indexed(model) 89 | 90 | 91 | def prepare_value(value: Any) -> str: 92 | """ 93 | Prepares a value for indexing. 94 | """ 95 | if not value: 96 | return "" 97 | if isinstance(value, str): 98 | return value 99 | if isinstance(value, list): 100 | return ", ".join(prepare_value(item) for item in value) 101 | if isinstance(value, dict): 102 | return ", ".join(prepare_value(item) for item in value.values()) 103 | if callable(value): 104 | return str(value()) 105 | return str(value) 106 | 107 | 108 | @lru_cache(maxsize=None) 109 | def get_document_fields(model: Type[Model], item: Model) -> Dict[str, str]: 110 | """ 111 | Walks through the model's search fields and returns a dictionary of fields to be indexed. 112 | """ 113 | doc_fields: Dict[str, str] = {} 114 | for field in model.get_search_fields(): 115 | if isinstance(field, (SearchField, FilterField, AutocompleteField)): 116 | with contextlib.suppress(Exception): 117 | doc_fields[get_field_mapping(field)] = prepare_value(field.get_value(item)) 118 | elif isinstance(field, RelatedFields): 119 | value = field.get_value(item) 120 | if isinstance(value, (Manager, QuerySet)): 121 | qs = value.all() 122 | for sub_field in field.fields: 123 | sub_values = qs.values_list(sub_field.field_name, flat=True) 124 | with contextlib.suppress(Exception): 125 | doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = ( 126 | prepare_value(list(sub_values)) 127 | ) 128 | elif isinstance(value, Model): 129 | for sub_field in field.fields: 130 | with contextlib.suppress(Exception): 131 | doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = ( 132 | prepare_value(sub_field.get_value(value)) 133 | ) 134 | return doc_fields 135 | 136 | 137 | def ranked_ids_from_search_results(results: Dict[str, Any]) -> List[Tuple[int, float]]: 138 | """ 139 | Extract all IDs and ranking scores from the hits in each index of the search results, 140 | sorted by ranking score in descending order. 141 | 142 | Args: 143 | results (Dict[str, Any]): The search results dictionary from MeiliSearch. 144 | Expected to have a 'results' key containing a list of index results, 145 | each with a 'hits' list containing objects with 'id' and '_rankingScore' keys. 146 | 147 | Returns: 148 | List[Tuple[int, float]]: A list of tuples containing (id, ranking_score) for each hit, 149 | sorted by ranking score in descending order. 150 | If a hit doesn't have a ranking score, it defaults to 0.0. 151 | """ 152 | items: List[Tuple[int, float]] = [] 153 | 154 | # Handle case where results is directly a single index result 155 | if "hits" in results: 156 | items.extend( 157 | (hit["id"], hit.get("_rankingScore", 0.0)) for hit in results["hits"] if "id" in hit 158 | ) 159 | return items 160 | 161 | # Handle case where results contains multiple index results 162 | if "results" in results: 163 | for index_result in results["results"]: 164 | if "hits" in index_result: 165 | items.extend( 166 | (hit["id"], hit.get("_rankingScore", 0.0)) 167 | for hit in index_result["hits"] 168 | if "id" in hit 169 | ) 170 | 171 | # Sort the results by ranking score in descending order 172 | return sorted(items, key=lambda x: x[1], reverse=True) 173 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/settings.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from typing import Any, Dict, List, Optional, Type, Union 3 | 4 | from django.db.models import Model 5 | from wagtail.search.index import AutocompleteField, FilterField, SearchField 6 | 7 | from .defaults import AUTOCOMPLETE_SUFFIX, DEFAULT_RANKING_RULES, FILTER_SUFFIX, STOP_WORDS 8 | 9 | 10 | def _get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str: 11 | """Returns the appropriate field mapping based on the field type. 12 | 13 | Args: 14 | field: The field to get the mapping for. Can be a SearchField, FilterField, 15 | or AutocompleteField. 16 | 17 | Returns: 18 | str: The field name with an appropriate suffix if needed. 19 | """ 20 | if isinstance(field, FilterField): 21 | return field.field_name + FILTER_SUFFIX 22 | if isinstance(field, AutocompleteField): 23 | return field.field_name + AUTOCOMPLETE_SUFFIX 24 | return field.field_name 25 | 26 | 27 | class MeiliSettings: 28 | """One class to hold all the settings to apply to the various indexes. 29 | 30 | This class centralizes all settings that need to be applied to MeiliSearch indexes 31 | and provides methods to apply these settings to specific indexes. 32 | 33 | Attributes: 34 | query_limit (int): Maximum number of results to return 35 | ranking_rules (List[str]): Rules for ranking search results 36 | skip_models (List[Type[Model]]): Models to skip indexing 37 | stop_words (List[str]): Words to exclude from search 38 | update_delta (Optional[Dict[str, int]]): Time delta for updates in delta strategy 39 | update_strategy (str): Strategy for updating indexes (soft, hard, delta) 40 | """ 41 | 42 | def __init__(self, params: Dict[str, Any]) -> None: 43 | """Initialize MeiliSettings with configuration parameters. 44 | 45 | Args: 46 | params: Dictionary containing configuration parameters for MeiliSearch. 47 | Accepted keys include: 48 | - STOP_WORDS: List of words to exclude from search 49 | - SKIP_MODELS: List of models to exclude from indexing 50 | - UPDATE_STRATEGY: Strategy for updating indexes ("soft", "hard", or "delta") 51 | - QUERY_LIMIT: Maximum number of results to return 52 | - RANKING_RULES: Rules for ranking search results 53 | - UPDATE_DELTA: Time delta for updates when using "delta" strategy 54 | """ 55 | self.stop_words: List[str] = params.get("STOP_WORDS", STOP_WORDS) 56 | self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", []) 57 | self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft") 58 | self.query_limit: int = params.get("QUERY_LIMIT", 999999) 59 | self.ranking_rules: List[str] = params.get("RANKING_RULES", DEFAULT_RANKING_RULES) 60 | self.update_delta: Optional[Dict[str, int]] = None 61 | self.index: Any = None 62 | if self.update_strategy == "delta": 63 | self.update_delta = params.get("UPDATE_DELTA", {"weeks": -1}) 64 | 65 | def apply_settings(self, index: Any) -> None: 66 | """Apply all settings to the specified index. 67 | 68 | This method applies pagination, searchable attributes, filterable attributes, 69 | ranking rules, and stop words settings to the given index. 70 | 71 | Args: 72 | index: The MeiliSearch index to apply settings to. 73 | """ 74 | self.index = index 75 | model = self.index.model 76 | 77 | self._apply_paginator(model=model, index=index) 78 | self._apply_searchable_attributes(model=model, index=index) 79 | self._apply_filterable_attributes(model=model, index=index) 80 | self._apply_ranking_rules(model=model, index=index) 81 | self._apply_stop_words(model=model, index=index) 82 | sys.stdout.write(f"Settings applied for {model}\n") 83 | 84 | def _apply_paginator(self, model: Optional[Type[Model]], index: Any) -> None: 85 | """Apply pagination settings to the index. 86 | 87 | Sets the maximum number of hits that can be returned by the index. 88 | 89 | Args: 90 | model: The model associated with the index. 91 | index: The MeiliSearch index to apply settings to. 92 | """ 93 | try: 94 | index.index.update_settings( 95 | { 96 | "pagination": { 97 | "maxTotalHits": self.query_limit, 98 | }, 99 | }, 100 | ) 101 | except Exception as err: 102 | sys.stdout.write(f"WARN: Failed to update paginator on {model}\n") 103 | sys.stdout.write(f"{err}\n") 104 | 105 | def _apply_searchable_attributes(self, model: Optional[Type[Model]], index: Any) -> None: 106 | """Apply searchable attributes settings to the index. 107 | 108 | Takes the searchable fields for a model, orders them by their boost score (descending) 109 | and then sends that to the index settings as searchableAttributes - a list of field names. 110 | 111 | Example: 112 | [ 113 | 'title', 114 | 'blurb', 115 | 'body', 116 | ] 117 | 118 | Args: 119 | model: The model to update searchable attributes for. 120 | index: The MeiliSearch index to apply settings to. 121 | """ 122 | if model is None: 123 | return 124 | 125 | ordered_fields: List[str] = self._ordered_fields(model) 126 | 127 | if not ordered_fields: 128 | return 129 | 130 | try: 131 | index.index.update_settings( 132 | { 133 | "searchableAttributes": ordered_fields, 134 | }, 135 | ) 136 | except Exception as err: 137 | sys.stdout.write(f"WARN: Failed to update searchable attributes on {model}: {err}\n") 138 | 139 | def _apply_filterable_attributes(self, model: Optional[Type[Model]], index: Any) -> None: 140 | """Apply filterable attributes settings to the index. 141 | 142 | Collects all FilterField fields from the model and sets them as filterable 143 | attributes in the MeiliSearch index. 144 | 145 | Args: 146 | model: The model to update filterable attributes for. 147 | index: The MeiliSearch index to apply settings to. 148 | """ 149 | # Add filter / facet fields 150 | filter_fields = ["content_type_id_filter"] 151 | for field in model.get_search_fields(): 152 | if isinstance(field, FilterField): 153 | try: # noqa: SIM105 154 | filter_fields.append(_get_field_mapping(field)) 155 | except Exception: # noqa: S110 156 | pass 157 | 158 | try: 159 | index.index.update_filterable_attributes(filter_fields) 160 | except Exception as err: 161 | sys.stdout.write(f"WARN: Failed to update filterable_attributes on {model}\n") 162 | sys.stdout.write(f"{err}\n") 163 | 164 | def _apply_ranking_rules(self, model: Optional[Type[Model]], index: Any) -> None: 165 | """Apply ranking rules settings to the index. 166 | 167 | Sets the ranking rules that determine the order of search results. 168 | 169 | Args: 170 | model: The model associated with the index. 171 | index: The MeiliSearch index to apply settings to. 172 | """ 173 | try: 174 | index.index.update_settings( 175 | { 176 | "rankingRules": self.ranking_rules, 177 | }, 178 | ) 179 | except Exception as err: 180 | sys.stdout.write(f"WARN: Failed to update ranking_rules on {model}\n") 181 | sys.stdout.write(f"{err}\n") 182 | 183 | def _apply_stop_words(self, model: Optional[Type[Model]], index: Any) -> None: 184 | """Apply stop words settings to the index. 185 | 186 | Sets the list of words that should be excluded from search indexing. 187 | 188 | Args: 189 | model: The model associated with the index. 190 | index: The MeiliSearch index to apply settings to. 191 | """ 192 | try: 193 | index.index.update_settings( 194 | { 195 | "stopWords": self.stop_words, 196 | }, 197 | ) 198 | except Exception as err: 199 | sys.stdout.write(f"WARN: Failed to update stop words on {model}\n") 200 | sys.stdout.write(f"{err}\n") 201 | 202 | def _ordered_fields(self, model: Type[Model]) -> List[str]: 203 | """Create a list of fields ordered by their boost values. 204 | 205 | Extracts searchable fields from the model and sorts them by their 206 | boost values in descending order (highest boost first). 207 | 208 | Args: 209 | model: The model to get field boosts for. 210 | 211 | Returns: 212 | List[str]: A list of field names ordered by their boost values in descending order. 213 | """ 214 | if not model or not hasattr(model, "search_fields"): 215 | return [] 216 | 217 | fields = [] 218 | for field in model.search_fields: 219 | if not isinstance(field, (SearchField, AutocompleteField)): 220 | continue 221 | boost = 1 222 | if hasattr(field, "boost"): 223 | # Ensure boost is a number, default to 1 if None or invalid 224 | try: 225 | boost = 1 if field.boost is None else field.boost 226 | except (TypeError, ValueError): 227 | boost = 1 228 | fields.append((field.field_name, boost)) # noqa: PERF401 229 | 230 | # Sort safely with a key function that handles None values 231 | def safe_sort_key(item): 232 | """Safe sorting key function that handles None boost values. 233 | 234 | Args: 235 | item: A tuple of (field_name, boost_value) 236 | 237 | Returns: 238 | int or float: The boost value or 0 if the boost is None 239 | """ 240 | _, boost = item 241 | # Return a default value (0) if boost is None 242 | return 0 if boost is None else boost 243 | 244 | sorted_fields = [field[0] for field in sorted(fields, key=safe_sort_key, reverse=True)] 245 | return sorted_fields 246 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/backend.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Type, TypeVar, Union 2 | 3 | import meilisearch 4 | from django.db.models import Model, QuerySet 5 | from django.utils.functional import cached_property 6 | from wagtail.search.backends.base import BaseSearchBackend, EmptySearchResults 7 | 8 | from .index import ( 9 | MeiliIndexRegistry, 10 | MeiliSearchModelIndex, 11 | ) 12 | from .query import MeiliSearchAutocompleteQueryCompiler, MeiliSearchQueryCompiler 13 | from .rebuilder import MeiliSearchRebuilder 14 | from .results import MeiliSearchResults 15 | from .settings import MeiliSettings 16 | from .utils import class_is_indexed, get_indexed_models 17 | 18 | T = TypeVar("T", bound=Model) 19 | 20 | 21 | class MeiliSearchBackend(BaseSearchBackend): 22 | """ 23 | A search backend implementation for MeiliSearch. 24 | 25 | This class provides methods to interact with MeiliSearch for indexing and searching content. 26 | """ 27 | 28 | query_compiler_class: MeiliSearchQueryCompiler = MeiliSearchQueryCompiler 29 | autocomplete_query_compiler_class: MeiliSearchAutocompleteQueryCompiler = ( 30 | MeiliSearchAutocompleteQueryCompiler 31 | ) 32 | results_class: MeiliSearchResults = MeiliSearchResults 33 | rebuilder_class: MeiliSearchRebuilder = MeiliSearchRebuilder 34 | 35 | def __init__(self, params: Dict[str, Any]) -> None: 36 | """ 37 | Initialize the MeiliSearchBackend. 38 | 39 | Args: 40 | params (dict): Configuration parameters for the backend. 41 | """ 42 | super().__init__(params) 43 | self.params = params 44 | self.client = self._init_client() 45 | self.settings = MeiliSettings(params) 46 | self.index_registry = MeiliIndexRegistry( 47 | backend=self, 48 | settings=self.settings, 49 | ) 50 | self.params: Dict[str, Any] = params 51 | self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", []) 52 | self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft") 53 | self.query_limit: int = params.get("QUERY_LIMIT", 999999) 54 | self.search_params: Dict[str, Any] = self._init_search_params() 55 | self.update_delta: Optional[Dict[str, int]] = self._init_update_delta() 56 | 57 | def get_index_for_model(self, model): 58 | """This gets called by the update_index management command and needs to exist 59 | as a method on the backend. 60 | 61 | Args: 62 | model (Model): The model we're looking for the index for 63 | 64 | Returns: 65 | MeiliSearchModelIndex: the index for the model 66 | """ 67 | return self.index_registry.get_index_for_model(model) 68 | 69 | @cached_property 70 | def client(self) -> meilisearch.Client: 71 | """ 72 | Lazily initialize and return the MeiliSearch client. 73 | 74 | Returns: 75 | meilisearch.Client: The initialized MeiliSearch client. 76 | """ 77 | if self._client is None: 78 | self._client = self._init_client() 79 | return self._client 80 | 81 | def _init_client(self) -> meilisearch.Client: 82 | """ 83 | Initialize the MeiliSearch client. 84 | 85 | Returns: 86 | meilisearch.Client: The initialized MeiliSearch client. 87 | 88 | Raises: 89 | Exception: If the client initialization fails. 90 | """ 91 | try: 92 | return meilisearch.Client( 93 | "{}:{}".format(self.params["HOST"], self.params["PORT"]), 94 | self.params["MASTER_KEY"], 95 | ) 96 | except Exception as err: 97 | msg = f"Failed to initialize MeiliSearch client: {err}" 98 | raise Exception(msg) from err 99 | 100 | def _init_search_params(self) -> Dict[str, Any]: 101 | """ 102 | Initialize the search parameters. 103 | 104 | Returns: 105 | dict: The initialized search parameters. 106 | """ 107 | return { 108 | "limit": self.query_limit, 109 | "attributesToRetrieve": ["id"], 110 | "showMatchesPosition": True, 111 | "showRankingScore": True, 112 | } 113 | 114 | def _init_update_delta(self) -> Optional[Dict[str, int]]: 115 | """ 116 | Initialize the update delta for the delta update strategy. 117 | 118 | Returns: 119 | dict or None: The update delta configuration or None if not using delta strategy. 120 | """ 121 | if self.update_strategy == "delta": 122 | return self.params.get("UPDATE_DELTA", {"weeks": -1}) 123 | return None 124 | 125 | def get_rebuilder(self) -> MeiliSearchRebuilder: 126 | """ 127 | Get the index rebuilder. 128 | 129 | Returns: 130 | MeiliSearchRebuilder: The index rebuilder. 131 | """ 132 | return self.rebuilder_class(self.get_index_for_model(None)) 133 | 134 | def reset_index(self) -> None: 135 | """Reset all indexes for indexed models.""" 136 | for model in get_indexed_models(): 137 | index = self.get_index_for_model(model) 138 | index._rebuild() 139 | 140 | def add_type(self, model: Type[Model]) -> None: 141 | """ 142 | Add a new model type to the index. 143 | 144 | Args: 145 | model: The model to add to the index. 146 | """ 147 | self.get_index_for_model(model).add_model(model) 148 | 149 | def refresh_index(self) -> None: 150 | """Refresh all indexes for indexed models.""" 151 | refreshed_indexes: List[MeiliSearchModelIndex] = [] 152 | for model in get_indexed_models(): 153 | index = self.get_index_for_model(model) 154 | if index not in refreshed_indexes: 155 | index.refresh() 156 | refreshed_indexes.append(index) 157 | 158 | def add(self, obj: Model) -> None: 159 | """ 160 | Add a single object to the index. 161 | 162 | Args: 163 | obj: The object to add to the index. 164 | """ 165 | self.get_index_for_model(type(obj)).add_item(obj) 166 | 167 | def add_bulk(self, model: Type[T], obj_list: List[T]) -> None: 168 | """ 169 | Add multiple objects to the index. 170 | 171 | Args: 172 | model: The model of the objects being added. 173 | obj_list (list): The list of objects to add to the index. 174 | """ 175 | index = self.get_index_for_model(model) 176 | index.add_items(model, obj_list) 177 | 178 | def delete(self, obj: Model) -> None: 179 | """ 180 | Delete an object from the index. 181 | 182 | Args: 183 | obj: The object to delete from the index. 184 | """ 185 | self.get_index_for_model(type(obj)).delete_item(obj) 186 | 187 | def _search( 188 | self, 189 | query_compiler_class: Union[ 190 | Type[MeiliSearchQueryCompiler], 191 | Type[MeiliSearchAutocompleteQueryCompiler], 192 | ], 193 | query: str, 194 | model_or_queryset: Union[Type[Model], QuerySet], 195 | **kwargs: Any, 196 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 197 | """ 198 | Perform a search using the specified query compiler. 199 | 200 | Args: 201 | query_compiler_class: The query compiler class to use. 202 | query (str): The search query. 203 | model_or_queryset: The model or queryset to search within. 204 | **kwargs: Additional search parameters. 205 | 206 | Returns: 207 | SearchResults: The search results. 208 | """ 209 | if isinstance(model_or_queryset, QuerySet): 210 | model = model_or_queryset.model 211 | queryset = model_or_queryset 212 | else: 213 | model = model_or_queryset 214 | queryset = model_or_queryset.objects.all() 215 | 216 | if not class_is_indexed(model): 217 | return EmptySearchResults() 218 | 219 | if query == "": 220 | return EmptySearchResults() 221 | 222 | search_query = query_compiler_class(queryset, query, **kwargs) 223 | search_query.check() 224 | 225 | return self.results_class(self, search_query) 226 | 227 | def search( 228 | self, 229 | query: str, 230 | model_or_queryset: Union[Type[Model], QuerySet], 231 | fields: Optional[List[str]] = None, 232 | operator: Optional[str] = None, 233 | order_by_relevance: bool = True, 234 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 235 | """ 236 | Perform a search. 237 | 238 | Args: 239 | query (str): The search query. 240 | model_or_queryset: The model or queryset to search within. 241 | fields (list, optional): The fields to search in. 242 | operator (str, optional): The operator to use for multiple search terms. 243 | order_by_relevance (bool, optional): Whether to order results by relevance. 244 | 245 | Returns: 246 | SearchResults: The search results. 247 | """ 248 | return self._search( 249 | self.query_compiler_class, 250 | query, 251 | model_or_queryset, 252 | fields=fields, 253 | operator=operator, 254 | order_by_relevance=order_by_relevance, 255 | ) 256 | 257 | def autocomplete( 258 | self, 259 | query: str, 260 | model_or_queryset: Union[Type[Model], QuerySet], 261 | fields: Optional[List[str]] = None, 262 | operator: Optional[str] = None, 263 | order_by_relevance: bool = True, 264 | ) -> Union[MeiliSearchResults, EmptySearchResults]: 265 | """ 266 | Perform an autocomplete search. 267 | 268 | Args: 269 | query (str): The autocomplete query. 270 | model_or_queryset: The model or queryset to search within. 271 | fields (list, optional): The fields to search in. 272 | operator (str, optional): The operator to use for multiple search terms. 273 | order_by_relevance (bool, optional): Whether to order results by relevance. 274 | 275 | Returns: 276 | SearchResults: The autocomplete search results. 277 | """ 278 | return self._search( 279 | self.autocomplete_query_compiler_class, 280 | query, 281 | model_or_queryset, 282 | fields=fields, 283 | operator=operator, 284 | order_by_relevance=order_by_relevance, 285 | ) 286 | 287 | 288 | SearchBackend = MeiliSearchBackend 289 | -------------------------------------------------------------------------------- /src/wagtail_meilisearch/results.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from typing import Any, Dict, List, Optional, Tuple, Type 3 | 4 | from django.db.models import Case, Model, QuerySet, When 5 | from wagtail.search.backends.base import BaseSearchResults 6 | from wagtail.search.query import Fuzzy, Phrase, PlainText 7 | 8 | from .utils import get_descendant_models, get_index_label, ranked_ids_from_search_results, weak_lru 9 | 10 | 11 | class MeiliSearchResults(BaseSearchResults): 12 | """A class to handle search results from MeiliSearch. 13 | 14 | This class extends BaseSearchResults and provides methods to process 15 | and retrieve search results from MeiliSearch, including faceting and filtering 16 | capabilities. 17 | 18 | Attributes: 19 | _last_count: Cache for the last count result. 20 | supports_facet: Whether faceting is supported by this backend. 21 | """ 22 | 23 | _last_count: Optional[int] = None 24 | supports_facet: bool = True 25 | 26 | def facet(self, field_name: str) -> OrderedDict: 27 | """ 28 | Retrieve facet data for a given field from MeiliSearch. To use this, you'd do something 29 | like this: 30 | 31 | ```python 32 | Page.objects.search('query').facet('content_type_id') 33 | ``` 34 | and this returns an ordered dictionary containing the facet data, ordered by the count 35 | of each facet value, like this... 36 | 37 | ``` 38 | OrderedDict([('58', 197), ('75', 2), ('52', 1), ('54', 1), ('61', 1)]) 39 | ``` 40 | 41 | In this example, pages with the content type ID of 58 return 197 results, and so on. 42 | 43 | Args: 44 | field_name (str): The name of the field for which to retrieve facet data. 45 | 46 | Returns: 47 | OrderedDict: An ordered dictionary containing the facet data. 48 | """ 49 | qc = self.query_compiler 50 | model = qc.queryset.model 51 | models = get_descendant_models(model) 52 | try: 53 | terms = qc.query.query_string 54 | except AttributeError: 55 | return None 56 | filter_field = f"{field_name}_filter" 57 | 58 | results = OrderedDict() 59 | for m in models: 60 | index = self.backend.get_index_for_model(m) 61 | filterable_fields = index.client.index(index.label).get_filterable_attributes() 62 | if filter_field in filterable_fields: 63 | result = index.search( 64 | terms, 65 | { 66 | "facets": [filter_field], 67 | }, 68 | ) 69 | try: 70 | res = result["facetDistribution"][filter_field] 71 | except KeyError: 72 | pass 73 | else: 74 | results.update(res) 75 | 76 | # Sort the results 77 | sorted_dict = OrderedDict(sorted(results.items(), key=lambda x: x[1], reverse=True)) 78 | 79 | return sorted_dict 80 | 81 | def filter(self, filters: List[Tuple[str, str]], operator: str = "AND") -> QuerySet: 82 | """Filter search results based on field-value pairs. 83 | 84 | Takes a list of tuples containing filter fields and values as strings, 85 | and checks they're valid before passing them on to _do_search. 86 | 87 | Args: 88 | filters: A list of (field_name, value) tuples to filter by. 89 | Example: [('category', 'news'), ('author', 'john')] 90 | 91 | Returns: 92 | QuerySet: Filtered search results. 93 | 94 | Raises: 95 | ValueError: If no filters are provided or if filters are invalid. 96 | """ 97 | if not len(filters): 98 | msg = "No filters provided" 99 | raise ValueError(msg) 100 | 101 | for item in filters: 102 | if not isinstance(item, tuple) or len(item) != 2: 103 | msg = f"Invalid filter item: {item}" 104 | raise ValueError(msg) 105 | 106 | res = self._do_search(filters=filters, operator=operator) 107 | return res 108 | 109 | @weak_lru() 110 | def _get_field_boosts(self, model: Type[Model]) -> Dict[str, float]: 111 | """Get the boost values for fields in a given model. 112 | 113 | Args: 114 | model: The model to get field boosts for. 115 | 116 | Returns: 117 | Dict[str, float]: A dictionary mapping field names to their boost values. 118 | """ 119 | boosts = {} 120 | for field in model.search_fields: 121 | if hasattr(field, "boost"): 122 | boosts[field.field_name] = field.boost 123 | return boosts 124 | 125 | @property 126 | def models(self) -> List[Type[Model]]: 127 | """Get all descendant models of the queried model. 128 | 129 | Returns: 130 | List[Type[Model]]: A list of descendant models. 131 | """ 132 | return get_descendant_models(self.query_compiler.queryset.model) 133 | 134 | @property 135 | def query_string(self) -> str: 136 | """Get the query string from the query compiler. 137 | 138 | Returns: 139 | str: The query string if it's a PlainText, Phrase, or Fuzzy query, 140 | otherwise an empty string. 141 | """ 142 | query = self.query_compiler.query 143 | if isinstance(query, (PlainText, Phrase, Fuzzy)): 144 | return query.query_string 145 | return "" 146 | 147 | def _build_queries( 148 | self, 149 | models: List[Type[Model]], 150 | terms: str, 151 | filters: Optional[List[Tuple[str, str]]] = None, 152 | operator: str = "AND", 153 | ) -> List[Dict[str, Any]]: 154 | """Build a list of queries for MeiliSearch's multi-search API. 155 | 156 | Creates query dictionaries for each model and applies any filters, 157 | suitable for passing to MeiliSearch's multi-search API. 158 | 159 | Args: 160 | models: The models to search. 161 | terms: The search terms. 162 | filters: The filters to apply, as (field, value) tuples. 163 | Defaults to None. 164 | 165 | Returns: 166 | List[Dict[str, Any]]: A list of query dictionaries ready for the API. 167 | """ 168 | if filters is None: 169 | filters = [] 170 | 171 | # This block was actually part of the old boosts used before Meilisearch had 172 | # native ranking. However, if I remove this, somehow we end up searching 173 | # across all indexes instead of only those covered by the queryset we 174 | # want to search in. Eventually I'll work out why and remove this. 175 | models_boosts = {} 176 | for model in models: 177 | label = get_index_label(model) 178 | models_boosts[label] = self._get_field_boosts(model) 179 | 180 | # Get active indexes 181 | # For model types that don't have any documents, meilisearch won't 182 | # create an index, so we have to check before running multi_search 183 | # if an index exists, otherwise the entire multi_search call will fail. 184 | limit = self.backend.settings.query_limit 185 | active_index_dict = self.backend.client.get_indexes({"limit": limit}) 186 | active_indexes = [index for index in active_index_dict["results"]] 187 | 188 | queries = [] 189 | for index in active_indexes: 190 | filterable_fields = index.get_filterable_attributes() 191 | q = { # noqa: PERF401 192 | "indexUid": index.uid, 193 | "q": terms, 194 | **self.backend.search_params, 195 | } 196 | if len(filters): 197 | filter_list = [] 198 | for item in filters: 199 | filter_field = f"{item[0]}_filter" 200 | filter_value = item[1] 201 | if filter_field in filterable_fields: 202 | filter_list.append(f"{filter_field} = '{filter_value}'") 203 | q["filter"] = f" {operator} ".join(filter_list) 204 | queries.append(q) 205 | 206 | return queries 207 | 208 | def _do_search( 209 | self, 210 | filters: Optional[List[Tuple[str, str]]] = None, 211 | operator: str = "AND", 212 | ) -> QuerySet: 213 | """Perform the search operation. 214 | 215 | Executes the search query against MeiliSearch, processes the results, 216 | calculates scores, and returns the results in the order specified by the query compiler. 217 | 218 | Args: 219 | filters: Optional list of (field, value) tuples to filter the search results. 220 | Defaults to None. 221 | 222 | Returns: 223 | QuerySet: A queryset of search results, ordered by relevance if specified. 224 | """ 225 | models = self.models 226 | terms = self.query_string 227 | 228 | queries = self._build_queries(models, terms, filters, operator) 229 | multi_search_results = self.backend.client.multi_search(queries) 230 | 231 | # Get search results sorted by relevance score in descending order (highest scores first) 232 | # We do this here so that we can pre-sort the ID list by rank so that if we're searching 233 | # within a window of results, that window will only be searching within the top ranked 234 | # results. 235 | sorted_id_score_pairs = ranked_ids_from_search_results(multi_search_results) 236 | id_to_score = {id: score for id, score in sorted_id_score_pairs} 237 | sorted_ids = [id for id, _ in sorted_id_score_pairs] 238 | 239 | # Retrieve results from the database 240 | qc = self.query_compiler 241 | window_sorted_ids = sorted_ids[self.start : self.stop] 242 | results = qc.queryset.filter(pk__in=window_sorted_ids) 243 | 244 | # Preserve the order by relevance score by annotating with actual scores 245 | if qc.order_by_relevance and sorted_ids: 246 | # Create a mapping from ID to its actual ranking score 247 | # This directly uses the score values from MeiliSearch 248 | # Higher scores will be ordered first when we use descending order 249 | score_cases = [When(pk=pk, then=id_to_score.get(pk, 0.0)) for pk in sorted_ids] 250 | 251 | # Annotate the queryset with the actual scores 252 | preserved_score = Case(*score_cases, default=0.0) 253 | results = results.annotate(search_rank=preserved_score) 254 | 255 | # Order by the actual score in descending order (highest first) 256 | results = results.order_by("-search_rank") 257 | # Enable this for debugging 258 | # for result in results: 259 | # print(f"{result.search_rank}: {result.id} - {result.title}") 260 | 261 | res = results.distinct() 262 | 263 | return res 264 | 265 | def _do_count(self) -> int: 266 | """Count the total number of search results. 267 | 268 | This method gets called before _do_search when using Django's paginator. 269 | It ensures that _results_cache and _count_cache are properly populated. 270 | 271 | Note: 272 | This method gets called before _do_search when using Django pagination, 273 | which means _results_cache and _count_cache may be empty on first run. 274 | 275 | Returns: 276 | int: The total number of search results. 277 | """ 278 | if self._count_cache: 279 | return self._count_cache 280 | if self._results_cache: 281 | return len(self._results_cache) 282 | 283 | res = self._do_search() 284 | self._count_cache = res.count() 285 | self._results_cache = list(res) 286 | return self._count_cache 287 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wagtail MeiliSearch 2 | 3 | This is a Wagtail search backend for the [MeiliSearch](https://github.com/meilisearch/MeiliSearch) search engine. 4 | 5 | 6 | ## Installation 7 | 8 | `uv add wagtail_meilisearch` or `pip install wagtail_meilisearch` 9 | 10 | ## Upgrading 11 | 12 | If you're upgrading MeiliSearch from 0.9.x to anything higher, you will need to destroy and re-create MeiliSearch's data.ms directory. 13 | 14 | ## Requirements 15 | 16 | - Python >=3.10 17 | - wagtail >=6.0 18 | - meilisearch-python >= 0.36.0 19 | 20 | Tested against Meilisearch server v1.15.2 - latest at the time of writing. 21 | 22 | ## Configuration 23 | 24 | See the [MeiliSearch docs](https://docs.meilisearch.com/guides/advanced_guides/installation.html#environment-variables-and-flags) for info on the values you want to add here. 25 | 26 | ```python 27 | WAGTAILSEARCH_BACKENDS = { 28 | 'default': { 29 | 'BACKEND': 'wagtail_meilisearch.backend', 30 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 31 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 32 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '') 33 | }, 34 | } 35 | ``` 36 | 37 | ## Update strategies 38 | 39 | Indexing a very large site with `python manage.py update_index` can be pretty taxing on the CPU, take quite a long time, and reduce the responsiveness of the MeiliSearch server. Wagtail-MeiliSearch offers two update strategies, `soft` and `hard`. The default, `soft` strategy will do an "add or update" call for each document sent to it, while the `hard` strategy will delete every document in the index and then replace them. 40 | 41 | There are tradeoffs with either strategy - `hard` will guarantee that your search data matches your model data, but be hard work on the CPU for longer. `soft` will be faster and less CPU intensive, but if a field is removed from your model between indexings, that field data will remain in the search index. 42 | 43 | ### Delta strategy 44 | 45 | The `delta` strategy is useful if you habitually add created_at and updated_at timestamps to your models. This strategy will check the fields... 46 | 47 | * `first_published_at` 48 | * `last_published_at` 49 | * `created_at` 50 | * `updated_at` 51 | 52 | And only update the records for objects where one or more of these fields has a date more recent than the time delta specified in the settings. 53 | 54 | ```python 55 | WAGTAILSEARCH_BACKENDS = { 56 | 'default': { 57 | 'BACKEND': 'wagtail_meilisearch.backend', 58 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 59 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 60 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '') 61 | 'UPDATE_STRATEGY': delta, 62 | 'UPDATE_DELTA': { 63 | 'weeks': -1 64 | } 65 | } 66 | } 67 | ``` 68 | 69 | If the delta is set to `{'weeks': -1}`, wagtail-meilisearch will only update indexes for documents where one of the timestamp fields has a date within the last week. Your time delta _must_ be a negative. 70 | 71 | Under the hood we use [Arrow](https://arrow.readthedocs.io), so you can use any keyword args supported by [Arrow's `shift()`](https://arrow.readthedocs.io/en/latest/index.html#replace-shift). 72 | 73 | If you set `UPDATE_STRATEGY` to `delta` but don't provide a value for `UPDATE_DELTA` wagtail-meilisearch will default to `{'weeks': -1}`. 74 | 75 | ## Skip models 76 | 77 | Sometimes you might have a site where a certain page model is guaranteed not to change, for instance an archive section. After creating your initial search index, you can add a `SKIP_MODELS` key to the config to tell wagtail-meilisearch to ignore specific models when running `update_index`. Behind the scenes wagtail-meilisearch returns a dummy model index to the `update_index` management command for every model listed in your `SKIP_MODELS` - this ensures that this setting only affects `update_index`, so if you manually edit one of the models listed it should get re-indexed with the update signal. 78 | 79 | ```python 80 | WAGTAILSEARCH_BACKENDS = { 81 | 'default': { 82 | 'BACKEND': 'wagtail_meilisearch.backend', 83 | 'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'), 84 | 'PORT': os.environ.get('MEILISEARCH_PORT', '7700'), 85 | 'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', ''), 86 | 'UPDATE_STRATEGY': 'delta', 87 | 'SKIP_MODELS': [ 88 | 'core.ArchivePage', 89 | ] 90 | } 91 | } 92 | ``` 93 | 94 | ## Stop Words 95 | 96 | Stop words are words for which we don't want to place significance on their frequency. For instance, the search query `tom and jerry` would return far less relevant results if the word `and` was given the same importance as `tom` and `jerry`. There's a fairly sane list of English language stop words supplied, but you can also supply your own. This is particularly useful if you have a lot of content in any other language. 97 | 98 | ```python 99 | MY_STOP_WORDS = ['a', 'list', 'of', 'words'] 100 | 101 | WAGTAILSEARCH_BACKENDS = { 102 | 'default': { 103 | 'BACKEND': 'wagtail_meilisearch.backend', 104 | [...] 105 | 'STOP_WORDS': MY_STOP_WORDS 106 | }, 107 | } 108 | ``` 109 | 110 | Or alternatively, you can extend the built in list. 111 | 112 | ```python 113 | from wagtail_meilisearch.settings import STOP_WORDS 114 | 115 | MY_STOP_WORDS = STOP_WORDS + WELSH_STOP_WORDS + FRENCH_STOP_WORDS 116 | 117 | WAGTAILSEARCH_BACKENDS = { 118 | 'default': { 119 | 'BACKEND': 'wagtail_meilisearch.backend', 120 | [...] 121 | 'STOP_WORDS': MY_STOP_WORDS 122 | }, 123 | } 124 | ``` 125 | 126 | ## Ranking 127 | 128 | We now support Meilisearch's native ranking system which is considerably faster than the rather hacky way we were having to do it before. Meilisearch takes a [list of fields ordered by precedence](https://www.meilisearch.com/docs/learn/relevancy/attribute_ranking_order) to affect the attribute ranking so we build that list by inspecting the `index.SearchField`s and `index.AutocompleteField`s on each model and ordering by boost. As an example, if you want the page title to be the most important field to rank on... 129 | 130 | ```python 131 | search_fields = Page.search_fields + [ 132 | index.AutocompleteField("title", boost=10), 133 | index.SearchField("body"), 134 | index.SearchField("search_description", boost=5), 135 | ] 136 | 137 | ``` 138 | 139 | Any field that doesn't have a `boost` value will be given a default of 0 but will still be sent to Meilisearch's settings as part of the ordered list, so the above settings send an attribute ranking order to Meilisearch of... 140 | 141 | ```python 142 | ['title', 'search_description', 'body'] 143 | ``` 144 | 145 | In the backend, we automatically annotate the search results with their ranking, with a float between 0 and 1 as `search_rank` so in your search view you can sort by that value. 146 | 147 | ```python 148 | def search_view(request): 149 | search_query = request.GET.get('query', '') 150 | search_results = Page.objects.search(search_query) 151 | 152 | # Results are already sorted by search_rank 153 | # You can access the rank for each result 154 | for result in search_results: 155 | print(f"Result: {result.title}, Rank: {result.search_rank}") 156 | 157 | return render(request, 'search_results.html.j2', { 158 | 'search_query': search_query, 159 | 'search_results': search_results, 160 | }) 161 | ``` 162 | 163 | And you might even fancy using the search rank in your template... 164 | 165 | ```jinja2 166 | {% for result in search_results %} 167 |
Relevance: {{ result.search_rank }}
170 |