├── tests
    ├── __init__.py
    └── test_wagtail_meilisearch.py
├── src
    └── wagtail_meilisearch
    │   ├── py.typed
    │   ├── __init__.py
    │   ├── management
    │       ├── __init__.py
    │       └── commands
    │       │   ├── __init__.py
    │       │   ├── meilisearch_indexes.py
    │       │   └── meilisearch_status.py
    │   ├── rebuilder.py
    │   ├── query.py
    │   ├── defaults.py
    │   ├── utils.py
    │   ├── settings.py
    │   ├── backend.py
    │   ├── results.py
    │   └── index.py
├── .gitignore
├── pyproject.toml
├── LICENSE
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/management/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/test_wagtail_meilisearch.py:
--------------------------------------------------------------------------------
1 | # One day.
2 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.pyc
3 | ref/**
4 | */__pycache__/**/*
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "wagtail-meilisearch"
 3 | version = "1.0.0"
 4 | description = "A MeiliSearch backend for Wagtail"
 5 | readme = "README.md"
 6 | authors = [
 7 |     { name = "Hactar", email = "systems@hactar.is" }
 8 | ]
 9 | requires-python = ">=3.10"
10 | dependencies = [
11 |     "arrow>=1.2.3",
12 |     "wagtail>=6.0",
13 |     "meilisearch>=0.36.0",
14 | ]
15 | 
16 | [build-system]
17 | requires = ["uv_build>=0.7.19,<0.8.0"]
18 | build-backend = "uv_build"
19 | 
20 | 
21 | [tool.ruff]
22 | target-version = "py310"
23 | line-length = 100
24 | extend-exclude = ["tests"]
25 | 
26 | [tool.ruff.lint]
27 | select = [
28 |     "E", "F", "B", "DJ", "C90", "S", "COM", "DTZ", "EM",
29 |     "PT", "RET", "SIM", "TCH", "ARG", "PTH", "PERF"
30 | ]
31 | extend-ignore = ["E402", "RET504", "S101", "DJ012"]
32 | 
33 | [tool.ruff.lint.isort]
34 | combine-as-imports = true
35 | force-wrap-aliases = true
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Hactar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/rebuilder.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from typing import TYPE_CHECKING, Optional, Type, Union
 3 | 
 4 | if TYPE_CHECKING:
 5 |     from django.db.models import Model
 6 | 
 7 | from .index import DummyModelIndex, MeiliSearchModelIndex
 8 | from .utils import get_index_label
 9 | 
10 | 
11 | class MeiliSearchRebuilder:
12 |     def __init__(self, model_index: MeiliSearchModelIndex) -> None:
13 |         self.index: MeiliSearchModelIndex = model_index
14 |         self.uid: str = get_index_label(self.index.model)
15 |         self.dummy_index: DummyModelIndex = DummyModelIndex()
16 |         self.settings = model_index.settings
17 | 
18 |     def start(self) -> Union[MeiliSearchModelIndex, DummyModelIndex]:
19 |         """
20 |         Starts the rebuild process for the search index.
21 | 
22 |         This method implements three strategies for rebuilding the index:
23 |         - 'hard': Deletes every document in the index and adds them anew.
24 |         - 'soft': Performs an "add or update" for each document.
25 |         - 'delta': Only updates documents that have been saved in the last X amount of time.
26 | 
27 |         Returns:
28 |             The appropriate index object for further operations.
29 |         """
30 |         model: Optional[Type[Model]] = self.index.model
31 |         if model and model._meta.label in self.index.backend.skip_models:
32 |             sys.stdout.write(f"SKIPPING: {model._meta.label}\n")
33 |             return self.dummy_index
34 | 
35 |         strategy: str = self.index.backend.update_strategy
36 | 
37 |         if strategy == "soft" or strategy == "delta":
38 |             # Soft update strategy
39 |             index = self.index.backend.get_index_for_model(model)
40 |         else:
41 |             # Hard update strategy
42 |             old_index = self.index.backend.get_index_for_model(model)
43 |             old_index.delete_all_documents()
44 | 
45 |         index: MeiliSearchModelIndex = self.index.backend.get_index_for_model(model)
46 |         self.settings.apply_settings(index=index)
47 |         return index
48 | 
49 |     def finish(self) -> None:
50 |         pass
51 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/query.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Generator, List, Optional, Type
 2 | 
 3 | from django.db.models import Model, Q
 4 | from wagtail.search.backends.base import BaseSearchQueryCompiler
 5 | from wagtail.search.utils import OR
 6 | 
 7 | from .utils import get_field_mapping
 8 | 
 9 | 
10 | class MeiliSearchQueryCompiler(BaseSearchQueryCompiler):
11 |     """A query compiler for MeiliSearch.
12 | 
13 |     This class extends BaseSearchQueryCompiler to provide MeiliSearch-specific
14 |     query compilation functionality.
15 | 
16 |     Attributes:
17 |         queryset (QuerySet): The base queryset to search within.
18 |         query (SearchQuery): The search query.
19 |         fields (List[str]): The fields to search in.
20 |         operator (str): The operator to use for combining search terms ('and' or 'or').
21 |         order_by_relevance (bool): Whether to order results by relevance.
22 | 
23 |     Methods:
24 |         _process_lookup: Process a lookup for a field.
25 |         _connect_filters: Connects multiple filters with a given connector.
26 |     """
27 | 
28 |     def _process_lookup(self, field: Any, lookup: str, value: Any) -> Q:
29 |         """Process a lookup for a field.
30 | 
31 |         Args:
32 |             field: The field to process the lookup for.
33 |             lookup: The type of lookup to perform.
34 |             value: The value to lookup.
35 | 
36 |         Returns:
37 |             Q: A Q object representing the lookup.
38 |         """
39 |         # Also borrowed from wagtail-whoosh
40 |         return Q(**{field.get_attname(self.queryset.model) + "__" + lookup: value})
41 | 
42 |     def _connect_filters(self, filters: List[Any], connector: str, negated: bool) -> Optional[Q]:
43 |         """Connects multiple filters with a given connector.
44 | 
45 |         Args:
46 |             filters: A list of filters to connect.
47 |             connector: The type of connector to use ('AND' or 'OR').
48 |             negated: Whether to negate the resulting filter.
49 | 
50 |         Returns:
51 |             Optional[Q]: A Q object representing the connected filters,
52 |                 or None if the connector is invalid.
53 |         """
54 |         # Also borrowed from wagtail-whoosh
55 |         if connector == "AND":
56 |             q = Q(*filters)
57 |         elif connector == "OR":
58 |             q = OR([Q(fil) for fil in filters])
59 |         else:
60 |             return None
61 | 
62 |         if negated:
63 |             q = ~q
64 | 
65 |         return q
66 | 
67 | 
68 | class MeiliSearchAutocompleteQueryCompiler(MeiliSearchQueryCompiler):
69 |     """A query compiler for MeiliSearch autocomplete searches.
70 | 
71 |     This class extends MeiliSearchQueryCompiler to provide specialized handling
72 |     for autocomplete searches in MeiliSearch.
73 |     """
74 | 
75 |     def _get_fields_names(self) -> Generator[str, None, None]:
76 |         """Generates field names for autocomplete search.
77 | 
78 |         This method yields the mapped field names for all autocomplete search fields
79 |         of the model associated with the current queryset.
80 | 
81 |         Yields:
82 |             str: The mapped field name for each autocomplete search field.
83 |         """
84 |         model: Type[Model] = self.queryset.model
85 |         for field in model.get_autocomplete_search_fields():
86 |             yield get_field_mapping(field)
87 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/management/commands/meilisearch_indexes.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | import arrow
 4 | from django.core.management.base import BaseCommand
 5 | from wagtail.search.backends import get_search_backend
 6 | 
 7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"]
 8 | 
 9 | 
10 | def human_readable_file_size(size_in_bytes: float) -> str:
11 |     """Convert a size in bytes to a human-readable string.
12 | 
13 |     Args:
14 |         size_in_bytes: The size in bytes to convert.
15 | 
16 |     Returns:
17 |         str: A human-readable representation of the size with appropriate unit suffix.
18 |             Returns 'Index too large' if the size exceeds the available units.
19 |     """
20 |     index = 0
21 |     while size_in_bytes >= 1024:
22 |         size_in_bytes /= 1024
23 |         index += 1
24 |     try:
25 |         rounded = "{0:.3f}".format(size_in_bytes)
26 |         return f"{rounded} {SIZE_UNITS[index]}"
27 |     except IndexError:
28 |         return "Index too large"
29 | 
30 | 
31 | class Command(BaseCommand):
32 |     """Command to display detailed information about each MeiliSearch index.
33 | 
34 |     This command retrieves and displays comprehensive settings and statistics
35 |     for all MeiliSearch indexes in the system.
36 |     """
37 | 
38 |     help = "Display info about each Meilisearch index"
39 | 
40 |     def handle(self, *_args, **_kwargs) -> None:
41 |         """Execute the command to display index information.
42 | 
43 |         Django passes arguments to this method, but we don't use them.
44 |         The underscore prefix indicates these arguments are intentionally unused.
45 |         """
46 |         b = get_search_backend()
47 |         stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats()
48 |         print(stats)
49 |         indexes: Dict[str, Dict] = stats["indexes"]
50 |         print("*" * 80)
51 |         print(f"Total DB size: {human_readable_file_size(stats['databaseSize'])}")
52 |         print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}")
53 |         if not len(indexes):
54 |             print("No indexes created yet")
55 |         else:
56 |             print("Indexes:")
57 |             for k, v in indexes.items():
58 |                 is_indexing = v["isIndexing"]
59 |                 index = b.client.get_index(k)
60 |                 settings = index.get_settings()
61 |                 settings.pop("stopWords")
62 |                 print(f"{k} - indexing: {is_indexing}")
63 |                 print(f"\t displayedAttributes: {settings.get('displayedAttributes')}")
64 |                 print(f"\t searchableAttributes: {settings.get('searchableAttributes')}")
65 |                 print(f"\t filterableAttributes: {settings.get('filterableAttributes')}")
66 |                 print(f"\t sortableAttributes: {settings.get('sortableAttributes')}")
67 |                 print(f"\t rankingRules: {settings.get('rankingRules')}")
68 |                 print(f"\t synonyms: {settings.get('synonyms')}")
69 |                 print(f"\t distinctAttribute: {settings.get('distinctAttribute')}")
70 |                 print(f"\t typoTolerance: {settings.get('typoTolerance')}")
71 |                 print(f"\t faceting: {settings.get('faceting')}")
72 |                 print(f"\t pagination: {settings.get('pagination')}")
73 | 
74 |                 print("\n")
75 |                 print("*" * 80)
76 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/management/commands/meilisearch_status.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional, Union
  2 | 
  3 | import arrow
  4 | from django.core.management.base import BaseCommand
  5 | from wagtail.search.backends import get_search_backend
  6 | 
  7 | SIZE_UNITS: List[str] = ["B", "KB", "MB", "GB", "TB", "PB"]
  8 | 
  9 | 
 10 | def human_readable_file_size(size_in_bytes: float) -> str:
 11 |     """Convert a size in bytes to a human-readable string.
 12 | 
 13 |     Args:
 14 |         size_in_bytes: The size in bytes to convert.
 15 | 
 16 |     Returns:
 17 |         str: A human-readable representation of the size with appropriate unit suffix.
 18 |             Returns 'Index too large' if the size exceeds the available units.
 19 |     """
 20 |     index = 0
 21 |     while size_in_bytes >= 1024:
 22 |         size_in_bytes /= 1024
 23 |         index += 1
 24 |     try:
 25 |         rounded = "{0:.3f}".format(size_in_bytes)
 26 |         return f"{rounded} {SIZE_UNITS[index]}"
 27 |     except IndexError:
 28 |         return "Index too large"
 29 | 
 30 | 
 31 | class Command(BaseCommand):
 32 |     """Command to display status information about MeiliSearch indexes.
 33 | 
 34 |     This command provides statistics about the MeiliSearch backend,
 35 |     including database size, last update time, and details about each index.
 36 |     """
 37 | 
 38 |     help = "Print some stats about the meilisearch backend"
 39 | 
 40 |     def add_arguments(self, parser) -> None:
 41 |         """Add command line arguments.
 42 | 
 43 |         Args:
 44 |             parser: The argument parser to which arguments should be added.
 45 |         """
 46 |         # Named (optional) arguments
 47 |         parser.add_argument(
 48 |             "--indexing",
 49 |             action="store_true",
 50 |             help="Show only models that MeiliSearch is currently indexing",
 51 |         )
 52 |         parser.add_argument(
 53 |             "--models",
 54 |             type=str,
 55 |             help="Show only models in this comma separated list of model labels",
 56 |         )
 57 | 
 58 |     def handle(self, **options) -> None:
 59 |         """Execute the command.
 60 | 
 61 |         Args:
 62 |             **options: Command options including 'models' and 'indexing'.
 63 |         """
 64 |         models: List[str] = []
 65 |         models_string: Optional[str] = options.get("models", "")
 66 |         if models_string:
 67 |             models = models_string.split(",")
 68 |         indexing: bool = options.get("indexing", False)
 69 | 
 70 |         # Get MeiliSearch backend and stats
 71 |         b = get_search_backend()
 72 |         stats: Dict[str, Union[float, str, Dict]] = b.client.get_all_stats()
 73 |         indexes: Dict[str, Dict] = stats["indexes"]
 74 | 
 75 |         print("*" * 80)
 76 |         print(f"Index DB size: {human_readable_file_size(stats['databaseSize'])}")
 77 |         print(f"Last updated: {arrow.get(stats['lastUpdate']).format('YYYY-MM-DD HH:mm:ss')}")
 78 | 
 79 |         if not len(indexes):
 80 |             print("No indexes created yet")
 81 |         else:
 82 |             print("Indexes:")
 83 |             for k, v in indexes.items():
 84 |                 model = k.replace("-", ".")
 85 |                 is_indexing = v["isIndexing"]
 86 | 
 87 |                 # Filter by model name if models list is provided
 88 |                 if len(models):
 89 |                     if model in models:
 90 |                         if indexing:
 91 |                             if is_indexing:
 92 |                                 self._print_index_stats(model, v)
 93 |                         else:
 94 |                             self._print_index_stats(model, v)
 95 |                 else:
 96 |                     if indexing:
 97 |                         if is_indexing:
 98 |                             self._print_index_stats(model, v)
 99 |                     else:
100 |                         self._print_index_stats(model, v)
101 | 
102 |         print("*" * 80)
103 | 
104 |     def _print_index_stats(self, model: str, v: Dict[str, Union[int, bool]]) -> None:
105 |         """Print statistics for a specific index.
106 | 
107 |         Args:
108 |             model: The model name (index label with dots instead of hyphens).
109 |             v: Dictionary containing index statistics.
110 |         """
111 |         print(f"{model}")
112 |         print(f"  Documents: {v['numberOfDocuments']}")
113 |         if v["isIndexing"] is True:
114 |             print("  INDEXING")
115 |         print("")
116 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/defaults.py:
--------------------------------------------------------------------------------
  1 | # Suffixes used for field mapping
  2 | AUTOCOMPLETE_SUFFIX: str = "_ngrams"
  3 | FILTER_SUFFIX: str = "_filter"
  4 | 
  5 | 
  6 | DEFAULT_RANKING_RULES: list[str] = [
  7 |     "words",
  8 |     "typo",
  9 |     "proximity",
 10 |     "attribute",
 11 |     "sort",
 12 |     "exactness",
 13 | ]
 14 | 
 15 | STOP_WORDS: list[str] = [
 16 |     "a",
 17 |     "about",
 18 |     "after",
 19 |     "again",
 20 |     "against",
 21 |     "all",
 22 |     "almost",
 23 |     "also",
 24 |     "although",
 25 |     "always",
 26 |     "am",
 27 |     "amount",
 28 |     "an",
 29 |     "and",
 30 |     "another",
 31 |     "any",
 32 |     "anyhow",
 33 |     "anyone",
 34 |     "anything",
 35 |     "anyway",
 36 |     "anywhere",
 37 |     "are",
 38 |     "around",
 39 |     "as",
 40 |     "at",
 41 |     "back",
 42 |     "be",
 43 |     "became",
 44 |     "because",
 45 |     "become",
 46 |     "becomes",
 47 |     "becoming",
 48 |     "been",
 49 |     "before",
 50 |     "beforehand",
 51 |     "being",
 52 |     "besides",
 53 |     "between",
 54 |     "beyond",
 55 |     "both",
 56 |     "but",
 57 |     "by",
 58 |     "can",
 59 |     "cannot",
 60 |     "cant",
 61 |     "could",
 62 |     "couldnt",
 63 |     "de",
 64 |     "describe",
 65 |     "detail",
 66 |     "do",
 67 |     "done",
 68 |     "down",
 69 |     "due",
 70 |     "during",
 71 |     "each",
 72 |     "eg",
 73 |     "eight",
 74 |     "either",
 75 |     "eleven",
 76 |     "else",
 77 |     "elsewhere",
 78 |     "empty",
 79 |     "enough",
 80 |     "etc",
 81 |     "even",
 82 |     "ever",
 83 |     "every",
 84 |     "everyone",
 85 |     "everything",
 86 |     "everywhere",
 87 |     "except",
 88 |     "few",
 89 |     "find",
 90 |     "first",
 91 |     "for",
 92 |     "former",
 93 |     "formerly",
 94 |     "found",
 95 |     "from",
 96 |     "front",
 97 |     "full",
 98 |     "further",
 99 |     "get",
100 |     "give",
101 |     "go",
102 |     "had",
103 |     "has",
104 |     "hasnt",
105 |     "have",
106 |     "he",
107 |     "hence",
108 |     "her",
109 |     "here",
110 |     "hereafter",
111 |     "hereby",
112 |     "herein",
113 |     "hereupon",
114 |     "hers",
115 |     "him",
116 |     "his",
117 |     "how",
118 |     "however",
119 |     "i",
120 |     "ie",
121 |     "if",
122 |     "in",
123 |     "inc",
124 |     "indeed",
125 |     "interest",
126 |     "into",
127 |     "is",
128 |     "it",
129 |     "its",
130 |     "keep",
131 |     "last",
132 |     "latter",
133 |     "latterly",
134 |     "least",
135 |     "less",
136 |     "ltd",
137 |     "made",
138 |     "many",
139 |     "may",
140 |     "me",
141 |     "meanwhile",
142 |     "might",
143 |     "mine",
144 |     "more",
145 |     "moreover",
146 |     "most",
147 |     "mostly",
148 |     "move",
149 |     "much",
150 |     "must",
151 |     "my",
152 |     "name",
153 |     "namely",
154 |     "neither",
155 |     "never",
156 |     "nevertheless",
157 |     "next",
158 |     "no",
159 |     "nobody",
160 |     "none",
161 |     "noone",
162 |     "nor",
163 |     "not",
164 |     "nothing",
165 |     "now",
166 |     "nowhere",
167 |     "of",
168 |     "off",
169 |     "often",
170 |     "on",
171 |     "once",
172 |     "one",
173 |     "only",
174 |     "onto",
175 |     "or",
176 |     "other",
177 |     "others",
178 |     "otherwise",
179 |     "our",
180 |     "ours",
181 |     "ourselves",
182 |     "out",
183 |     "over",
184 |     "own",
185 |     "part",
186 |     "per",
187 |     "perhaps",
188 |     "put",
189 |     "rather",
190 |     "re",
191 |     "same",
192 |     "see",
193 |     "seem",
194 |     "seemed",
195 |     "seeming",
196 |     "seems",
197 |     "serious",
198 |     "several",
199 |     "she",
200 |     "should",
201 |     "show",
202 |     "side",
203 |     "since",
204 |     "so",
205 |     "some",
206 |     "somehow",
207 |     "someone",
208 |     "something",
209 |     "sometime",
210 |     "sometimes",
211 |     "somewhere",
212 |     "still",
213 |     "such",
214 |     "take",
215 |     "than",
216 |     "that",
217 |     "the",
218 |     "their",
219 |     "them",
220 |     "themselves",
221 |     "then",
222 |     "there",
223 |     "thereafter",
224 |     "thereby",
225 |     "therefore",
226 |     "therein",
227 |     "thereupon",
228 |     "these",
229 |     "they",
230 |     "thick",
231 |     "thin",
232 |     "this",
233 |     "those",
234 |     "though",
235 |     "through",
236 |     "throughout",
237 |     "thru",
238 |     "thus",
239 |     "to",
240 |     "together",
241 |     "too",
242 |     "top",
243 |     "toward",
244 |     "towards",
245 |     "un",
246 |     "under",
247 |     "until",
248 |     "up",
249 |     "upon",
250 |     "us",
251 |     "very",
252 |     "via",
253 |     "was",
254 |     "we",
255 |     "well",
256 |     "were",
257 |     "what",
258 |     "whatever",
259 |     "when",
260 |     "whence",
261 |     "whenever",
262 |     "where",
263 |     "whereafter",
264 |     "whereas",
265 |     "whereby",
266 |     "wherein",
267 |     "whereupon",
268 |     "wherever",
269 |     "whether",
270 |     "which",
271 |     "while",
272 |     "who",
273 |     "whoever",
274 |     "whole",
275 |     "whom",
276 |     "whose",
277 |     "why",
278 |     "will",
279 |     "with",
280 |     "within",
281 |     "without",
282 |     "would",
283 |     "yet",
284 |     "you",
285 |     "your",
286 |     "yours",
287 |     "yourself",
288 |     "yourselves",
289 | ]
290 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/utils.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | import functools
  3 | import weakref
  4 | from functools import lru_cache
  5 | from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, Union, cast
  6 | 
  7 | from django.apps import apps
  8 | from django.db.models import Manager, Model, QuerySet
  9 | from wagtail.search.index import AutocompleteField, FilterField, RelatedFields, SearchField
 10 | 
 11 | from .settings import AUTOCOMPLETE_SUFFIX, FILTER_SUFFIX
 12 | 
 13 | # Type variables for generic functions
 14 | T = TypeVar("T")
 15 | F = TypeVar("F", bound=Callable[..., Any])
 16 | 
 17 | 
 18 | def weak_lru(maxsize: int = 128, typed: bool = False) -> Callable[[F], F]:
 19 |     """
 20 |     LRU Cache decorator that keeps a weak reference to "self" and
 21 |     can be safely used on class methods
 22 |     """
 23 | 
 24 |     def wrapper(func: F) -> F:
 25 |         @functools.lru_cache(maxsize, typed)
 26 |         def _func(_self: Callable[[], Any], *args: Any, **kwargs: Any) -> Any:
 27 |             return func(_self(), *args, **kwargs)
 28 | 
 29 |         @functools.wraps(func)
 30 |         def inner(self: Any, *args: Any, **kwargs: Any) -> Any:
 31 |             return _func(weakref.ref(self), *args, **kwargs)
 32 | 
 33 |         return cast("F", inner)
 34 | 
 35 |     return wrapper
 36 | 
 37 | 
 38 | @lru_cache(maxsize=None)
 39 | def get_index_label(model: Optional[Type[Model]]) -> str:
 40 |     """
 41 |     Returns a unique label for the model's index.
 42 |     """
 43 |     if model is None:
 44 |         return ""
 45 |     return model._meta.label.replace(".", "-")
 46 | 
 47 | 
 48 | @lru_cache(maxsize=None)
 49 | def get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str:
 50 |     """
 51 |     Returns the appropriate field mapping based on the field type.
 52 |     """
 53 |     if isinstance(field, FilterField):
 54 |         return field.field_name + FILTER_SUFFIX
 55 |     if isinstance(field, AutocompleteField):
 56 |         return field.field_name + AUTOCOMPLETE_SUFFIX
 57 |     return field.field_name
 58 | 
 59 | 
 60 | @lru_cache(maxsize=None)
 61 | def get_descendant_models(model: Type[Model]) -> List[Type[Model]]:
 62 |     """
 63 |     Returns all descendants of a model.
 64 |     e.g. for a search on Page, return [HomePage, ContentPage, Page] etc.
 65 |     """
 66 |     descendant_models = [
 67 |         other_model for other_model in apps.get_models() if issubclass(other_model, model)
 68 |     ]
 69 |     return descendant_models
 70 | 
 71 | 
 72 | @lru_cache(maxsize=None)
 73 | def get_indexed_models() -> List[Type[Model]]:
 74 |     """
 75 |     Returns a list of all models that are registered for indexing.
 76 |     """
 77 |     from wagtail.search.index import get_indexed_models as wagtail_get_indexed_models
 78 | 
 79 |     return wagtail_get_indexed_models()
 80 | 
 81 | 
 82 | def class_is_indexed(model: Type[Model]) -> bool:
 83 |     """
 84 |     Returns True if the model is registered for indexing.
 85 |     """
 86 |     from wagtail.search.index import class_is_indexed as wagtail_class_is_indexed
 87 | 
 88 |     return wagtail_class_is_indexed(model)
 89 | 
 90 | 
 91 | def prepare_value(value: Any) -> str:
 92 |     """
 93 |     Prepares a value for indexing.
 94 |     """
 95 |     if not value:
 96 |         return ""
 97 |     if isinstance(value, str):
 98 |         return value
 99 |     if isinstance(value, list):
100 |         return ", ".join(prepare_value(item) for item in value)
101 |     if isinstance(value, dict):
102 |         return ", ".join(prepare_value(item) for item in value.values())
103 |     if callable(value):
104 |         return str(value())
105 |     return str(value)
106 | 
107 | 
108 | @lru_cache(maxsize=None)
109 | def get_document_fields(model: Type[Model], item: Model) -> Dict[str, str]:
110 |     """
111 |     Walks through the model's search fields and returns a dictionary of fields to be indexed.
112 |     """
113 |     doc_fields: Dict[str, str] = {}
114 |     for field in model.get_search_fields():
115 |         if isinstance(field, (SearchField, FilterField, AutocompleteField)):
116 |             with contextlib.suppress(Exception):
117 |                 doc_fields[get_field_mapping(field)] = prepare_value(field.get_value(item))
118 |         elif isinstance(field, RelatedFields):
119 |             value = field.get_value(item)
120 |             if isinstance(value, (Manager, QuerySet)):
121 |                 qs = value.all()
122 |                 for sub_field in field.fields:
123 |                     sub_values = qs.values_list(sub_field.field_name, flat=True)
124 |                     with contextlib.suppress(Exception):
125 |                         doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = (
126 |                             prepare_value(list(sub_values))
127 |                         )
128 |             elif isinstance(value, Model):
129 |                 for sub_field in field.fields:
130 |                     with contextlib.suppress(Exception):
131 |                         doc_fields[f"{field.field_name}__{get_field_mapping(sub_field)}"] = (
132 |                             prepare_value(sub_field.get_value(value))
133 |                         )
134 |     return doc_fields
135 | 
136 | 
137 | def ranked_ids_from_search_results(results: Dict[str, Any]) -> List[Tuple[int, float]]:
138 |     """
139 |     Extract all IDs and ranking scores from the hits in each index of the search results,
140 |     sorted by ranking score in descending order.
141 | 
142 |     Args:
143 |         results (Dict[str, Any]): The search results dictionary from MeiliSearch.
144 |             Expected to have a 'results' key containing a list of index results,
145 |             each with a 'hits' list containing objects with 'id' and '_rankingScore' keys.
146 | 
147 |     Returns:
148 |         List[Tuple[int, float]]: A list of tuples containing (id, ranking_score) for each hit,
149 |                                  sorted by ranking score in descending order.
150 |                                  If a hit doesn't have a ranking score, it defaults to 0.0.
151 |     """
152 |     items: List[Tuple[int, float]] = []
153 | 
154 |     # Handle case where results is directly a single index result
155 |     if "hits" in results:
156 |         items.extend(
157 |             (hit["id"], hit.get("_rankingScore", 0.0)) for hit in results["hits"] if "id" in hit
158 |         )
159 |         return items
160 | 
161 |     # Handle case where results contains multiple index results
162 |     if "results" in results:
163 |         for index_result in results["results"]:
164 |             if "hits" in index_result:
165 |                 items.extend(
166 |                     (hit["id"], hit.get("_rankingScore", 0.0))
167 |                     for hit in index_result["hits"]
168 |                     if "id" in hit
169 |                 )
170 | 
171 |     # Sort the results by ranking score in descending order
172 |     return sorted(items, key=lambda x: x[1], reverse=True)
173 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/settings.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from typing import Any, Dict, List, Optional, Type, Union
  3 | 
  4 | from django.db.models import Model
  5 | from wagtail.search.index import AutocompleteField, FilterField, SearchField
  6 | 
  7 | from .defaults import AUTOCOMPLETE_SUFFIX, DEFAULT_RANKING_RULES, FILTER_SUFFIX, STOP_WORDS
  8 | 
  9 | 
 10 | def _get_field_mapping(field: Union[SearchField, FilterField, AutocompleteField]) -> str:
 11 |     """Returns the appropriate field mapping based on the field type.
 12 | 
 13 |     Args:
 14 |         field: The field to get the mapping for. Can be a SearchField, FilterField,
 15 |             or AutocompleteField.
 16 | 
 17 |     Returns:
 18 |         str: The field name with an appropriate suffix if needed.
 19 |     """
 20 |     if isinstance(field, FilterField):
 21 |         return field.field_name + FILTER_SUFFIX
 22 |     if isinstance(field, AutocompleteField):
 23 |         return field.field_name + AUTOCOMPLETE_SUFFIX
 24 |     return field.field_name
 25 | 
 26 | 
 27 | class MeiliSettings:
 28 |     """One class to hold all the settings to apply to the various indexes.
 29 | 
 30 |     This class centralizes all settings that need to be applied to MeiliSearch indexes
 31 |     and provides methods to apply these settings to specific indexes.
 32 | 
 33 |     Attributes:
 34 |         query_limit (int): Maximum number of results to return
 35 |         ranking_rules (List[str]): Rules for ranking search results
 36 |         skip_models (List[Type[Model]]): Models to skip indexing
 37 |         stop_words (List[str]): Words to exclude from search
 38 |         update_delta (Optional[Dict[str, int]]): Time delta for updates in delta strategy
 39 |         update_strategy (str): Strategy for updating indexes (soft, hard, delta)
 40 |     """
 41 | 
 42 |     def __init__(self, params: Dict[str, Any]) -> None:
 43 |         """Initialize MeiliSettings with configuration parameters.
 44 | 
 45 |         Args:
 46 |             params: Dictionary containing configuration parameters for MeiliSearch.
 47 |                 Accepted keys include:
 48 |                 - STOP_WORDS: List of words to exclude from search
 49 |                 - SKIP_MODELS: List of models to exclude from indexing
 50 |                 - UPDATE_STRATEGY: Strategy for updating indexes ("soft", "hard", or "delta")
 51 |                 - QUERY_LIMIT: Maximum number of results to return
 52 |                 - RANKING_RULES: Rules for ranking search results
 53 |                 - UPDATE_DELTA: Time delta for updates when using "delta" strategy
 54 |         """
 55 |         self.stop_words: List[str] = params.get("STOP_WORDS", STOP_WORDS)
 56 |         self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", [])
 57 |         self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft")
 58 |         self.query_limit: int = params.get("QUERY_LIMIT", 999999)
 59 |         self.ranking_rules: List[str] = params.get("RANKING_RULES", DEFAULT_RANKING_RULES)
 60 |         self.update_delta: Optional[Dict[str, int]] = None
 61 |         self.index: Any = None
 62 |         if self.update_strategy == "delta":
 63 |             self.update_delta = params.get("UPDATE_DELTA", {"weeks": -1})
 64 | 
 65 |     def apply_settings(self, index: Any) -> None:
 66 |         """Apply all settings to the specified index.
 67 | 
 68 |         This method applies pagination, searchable attributes, filterable attributes,
 69 |         ranking rules, and stop words settings to the given index.
 70 | 
 71 |         Args:
 72 |             index: The MeiliSearch index to apply settings to.
 73 |         """
 74 |         self.index = index
 75 |         model = self.index.model
 76 | 
 77 |         self._apply_paginator(model=model, index=index)
 78 |         self._apply_searchable_attributes(model=model, index=index)
 79 |         self._apply_filterable_attributes(model=model, index=index)
 80 |         self._apply_ranking_rules(model=model, index=index)
 81 |         self._apply_stop_words(model=model, index=index)
 82 |         sys.stdout.write(f"Settings applied for  {model}\n")
 83 | 
 84 |     def _apply_paginator(self, model: Optional[Type[Model]], index: Any) -> None:
 85 |         """Apply pagination settings to the index.
 86 | 
 87 |         Sets the maximum number of hits that can be returned by the index.
 88 | 
 89 |         Args:
 90 |             model: The model associated with the index.
 91 |             index: The MeiliSearch index to apply settings to.
 92 |         """
 93 |         try:
 94 |             index.index.update_settings(
 95 |                 {
 96 |                     "pagination": {
 97 |                         "maxTotalHits": self.query_limit,
 98 |                     },
 99 |                 },
100 |             )
101 |         except Exception as err:
102 |             sys.stdout.write(f"WARN: Failed to update paginator on {model}\n")
103 |             sys.stdout.write(f"{err}\n")
104 | 
105 |     def _apply_searchable_attributes(self, model: Optional[Type[Model]], index: Any) -> None:
106 |         """Apply searchable attributes settings to the index.
107 | 
108 |         Takes the searchable fields for a model, orders them by their boost score (descending)
109 |         and then sends that to the index settings as searchableAttributes - a list of field names.
110 | 
111 |         Example:
112 |             [
113 |                 'title',
114 |                 'blurb',
115 |                 'body',
116 |             ]
117 | 
118 |         Args:
119 |             model: The model to update searchable attributes for.
120 |             index: The MeiliSearch index to apply settings to.
121 |         """
122 |         if model is None:
123 |             return
124 | 
125 |         ordered_fields: List[str] = self._ordered_fields(model)
126 | 
127 |         if not ordered_fields:
128 |             return
129 | 
130 |         try:
131 |             index.index.update_settings(
132 |                 {
133 |                     "searchableAttributes": ordered_fields,
134 |                 },
135 |             )
136 |         except Exception as err:
137 |             sys.stdout.write(f"WARN: Failed to update searchable attributes on {model}: {err}\n")
138 | 
139 |     def _apply_filterable_attributes(self, model: Optional[Type[Model]], index: Any) -> None:
140 |         """Apply filterable attributes settings to the index.
141 | 
142 |         Collects all FilterField fields from the model and sets them as filterable
143 |         attributes in the MeiliSearch index.
144 | 
145 |         Args:
146 |             model: The model to update filterable attributes for.
147 |             index: The MeiliSearch index to apply settings to.
148 |         """
149 |         # Add filter / facet fields
150 |         filter_fields = ["content_type_id_filter"]
151 |         for field in model.get_search_fields():
152 |             if isinstance(field, FilterField):
153 |                 try:  # noqa: SIM105
154 |                     filter_fields.append(_get_field_mapping(field))
155 |                 except Exception:  # noqa: S110
156 |                     pass
157 | 
158 |         try:
159 |             index.index.update_filterable_attributes(filter_fields)
160 |         except Exception as err:
161 |             sys.stdout.write(f"WARN: Failed to update filterable_attributes on {model}\n")
162 |             sys.stdout.write(f"{err}\n")
163 | 
164 |     def _apply_ranking_rules(self, model: Optional[Type[Model]], index: Any) -> None:
165 |         """Apply ranking rules settings to the index.
166 | 
167 |         Sets the ranking rules that determine the order of search results.
168 | 
169 |         Args:
170 |             model: The model associated with the index.
171 |             index: The MeiliSearch index to apply settings to.
172 |         """
173 |         try:
174 |             index.index.update_settings(
175 |                 {
176 |                     "rankingRules": self.ranking_rules,
177 |                 },
178 |             )
179 |         except Exception as err:
180 |             sys.stdout.write(f"WARN: Failed to update ranking_rules on {model}\n")
181 |             sys.stdout.write(f"{err}\n")
182 | 
183 |     def _apply_stop_words(self, model: Optional[Type[Model]], index: Any) -> None:
184 |         """Apply stop words settings to the index.
185 | 
186 |         Sets the list of words that should be excluded from search indexing.
187 | 
188 |         Args:
189 |             model: The model associated with the index.
190 |             index: The MeiliSearch index to apply settings to.
191 |         """
192 |         try:
193 |             index.index.update_settings(
194 |                 {
195 |                     "stopWords": self.stop_words,
196 |                 },
197 |             )
198 |         except Exception as err:
199 |             sys.stdout.write(f"WARN: Failed to update stop words on {model}\n")
200 |             sys.stdout.write(f"{err}\n")
201 | 
202 |     def _ordered_fields(self, model: Type[Model]) -> List[str]:
203 |         """Create a list of fields ordered by their boost values.
204 | 
205 |         Extracts searchable fields from the model and sorts them by their
206 |         boost values in descending order (highest boost first).
207 | 
208 |         Args:
209 |             model: The model to get field boosts for.
210 | 
211 |         Returns:
212 |             List[str]: A list of field names ordered by their boost values in descending order.
213 |         """
214 |         if not model or not hasattr(model, "search_fields"):
215 |             return []
216 | 
217 |         fields = []
218 |         for field in model.search_fields:
219 |             if not isinstance(field, (SearchField, AutocompleteField)):
220 |                 continue
221 |             boost = 1
222 |             if hasattr(field, "boost"):
223 |                 # Ensure boost is a number, default to 1 if None or invalid
224 |                 try:
225 |                     boost = 1 if field.boost is None else field.boost
226 |                 except (TypeError, ValueError):
227 |                     boost = 1
228 |             fields.append((field.field_name, boost))  # noqa: PERF401
229 | 
230 |         # Sort safely with a key function that handles None values
231 |         def safe_sort_key(item):
232 |             """Safe sorting key function that handles None boost values.
233 | 
234 |             Args:
235 |                 item: A tuple of (field_name, boost_value)
236 | 
237 |             Returns:
238 |                 int or float: The boost value or 0 if the boost is None
239 |             """
240 |             _, boost = item
241 |             # Return a default value (0) if boost is None
242 |             return 0 if boost is None else boost
243 | 
244 |         sorted_fields = [field[0] for field in sorted(fields, key=safe_sort_key, reverse=True)]
245 |         return sorted_fields
246 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/backend.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Type, TypeVar, Union
  2 | 
  3 | import meilisearch
  4 | from django.db.models import Model, QuerySet
  5 | from django.utils.functional import cached_property
  6 | from wagtail.search.backends.base import BaseSearchBackend, EmptySearchResults
  7 | 
  8 | from .index import (
  9 |     MeiliIndexRegistry,
 10 |     MeiliSearchModelIndex,
 11 | )
 12 | from .query import MeiliSearchAutocompleteQueryCompiler, MeiliSearchQueryCompiler
 13 | from .rebuilder import MeiliSearchRebuilder
 14 | from .results import MeiliSearchResults
 15 | from .settings import MeiliSettings
 16 | from .utils import class_is_indexed, get_indexed_models
 17 | 
 18 | T = TypeVar("T", bound=Model)
 19 | 
 20 | 
 21 | class MeiliSearchBackend(BaseSearchBackend):
 22 |     """
 23 |     A search backend implementation for MeiliSearch.
 24 | 
 25 |     This class provides methods to interact with MeiliSearch for indexing and searching content.
 26 |     """
 27 | 
 28 |     query_compiler_class: MeiliSearchQueryCompiler = MeiliSearchQueryCompiler
 29 |     autocomplete_query_compiler_class: MeiliSearchAutocompleteQueryCompiler = (
 30 |         MeiliSearchAutocompleteQueryCompiler
 31 |     )
 32 |     results_class: MeiliSearchResults = MeiliSearchResults
 33 |     rebuilder_class: MeiliSearchRebuilder = MeiliSearchRebuilder
 34 | 
 35 |     def __init__(self, params: Dict[str, Any]) -> None:
 36 |         """
 37 |         Initialize the MeiliSearchBackend.
 38 | 
 39 |         Args:
 40 |             params (dict): Configuration parameters for the backend.
 41 |         """
 42 |         super().__init__(params)
 43 |         self.params = params
 44 |         self.client = self._init_client()
 45 |         self.settings = MeiliSettings(params)
 46 |         self.index_registry = MeiliIndexRegistry(
 47 |             backend=self,
 48 |             settings=self.settings,
 49 |         )
 50 |         self.params: Dict[str, Any] = params
 51 |         self.skip_models: List[Type[Model]] = params.get("SKIP_MODELS", [])
 52 |         self.update_strategy: str = params.get("UPDATE_STRATEGY", "soft")
 53 |         self.query_limit: int = params.get("QUERY_LIMIT", 999999)
 54 |         self.search_params: Dict[str, Any] = self._init_search_params()
 55 |         self.update_delta: Optional[Dict[str, int]] = self._init_update_delta()
 56 | 
 57 |     def get_index_for_model(self, model):
 58 |         """This gets called by the update_index management command and needs to exist
 59 |         as a method on the backend.
 60 | 
 61 |         Args:
 62 |             model (Model): The model we're looking for the index for
 63 | 
 64 |         Returns:
 65 |             MeiliSearchModelIndex: the index for the model
 66 |         """
 67 |         return self.index_registry.get_index_for_model(model)
 68 | 
 69 |     @cached_property
 70 |     def client(self) -> meilisearch.Client:
 71 |         """
 72 |         Lazily initialize and return the MeiliSearch client.
 73 | 
 74 |         Returns:
 75 |             meilisearch.Client: The initialized MeiliSearch client.
 76 |         """
 77 |         if self._client is None:
 78 |             self._client = self._init_client()
 79 |         return self._client
 80 | 
 81 |     def _init_client(self) -> meilisearch.Client:
 82 |         """
 83 |         Initialize the MeiliSearch client.
 84 | 
 85 |         Returns:
 86 |             meilisearch.Client: The initialized MeiliSearch client.
 87 | 
 88 |         Raises:
 89 |             Exception: If the client initialization fails.
 90 |         """
 91 |         try:
 92 |             return meilisearch.Client(
 93 |                 "{}:{}".format(self.params["HOST"], self.params["PORT"]),
 94 |                 self.params["MASTER_KEY"],
 95 |             )
 96 |         except Exception as err:
 97 |             msg = f"Failed to initialize MeiliSearch client: {err}"
 98 |             raise Exception(msg) from err
 99 | 
100 |     def _init_search_params(self) -> Dict[str, Any]:
101 |         """
102 |         Initialize the search parameters.
103 | 
104 |         Returns:
105 |             dict: The initialized search parameters.
106 |         """
107 |         return {
108 |             "limit": self.query_limit,
109 |             "attributesToRetrieve": ["id"],
110 |             "showMatchesPosition": True,
111 |             "showRankingScore": True,
112 |         }
113 | 
114 |     def _init_update_delta(self) -> Optional[Dict[str, int]]:
115 |         """
116 |         Initialize the update delta for the delta update strategy.
117 | 
118 |         Returns:
119 |             dict or None: The update delta configuration or None if not using delta strategy.
120 |         """
121 |         if self.update_strategy == "delta":
122 |             return self.params.get("UPDATE_DELTA", {"weeks": -1})
123 |         return None
124 | 
125 |     def get_rebuilder(self) -> MeiliSearchRebuilder:
126 |         """
127 |         Get the index rebuilder.
128 | 
129 |         Returns:
130 |             MeiliSearchRebuilder: The index rebuilder.
131 |         """
132 |         return self.rebuilder_class(self.get_index_for_model(None))
133 | 
134 |     def reset_index(self) -> None:
135 |         """Reset all indexes for indexed models."""
136 |         for model in get_indexed_models():
137 |             index = self.get_index_for_model(model)
138 |             index._rebuild()
139 | 
140 |     def add_type(self, model: Type[Model]) -> None:
141 |         """
142 |         Add a new model type to the index.
143 | 
144 |         Args:
145 |             model: The model to add to the index.
146 |         """
147 |         self.get_index_for_model(model).add_model(model)
148 | 
149 |     def refresh_index(self) -> None:
150 |         """Refresh all indexes for indexed models."""
151 |         refreshed_indexes: List[MeiliSearchModelIndex] = []
152 |         for model in get_indexed_models():
153 |             index = self.get_index_for_model(model)
154 |             if index not in refreshed_indexes:
155 |                 index.refresh()
156 |                 refreshed_indexes.append(index)
157 | 
158 |     def add(self, obj: Model) -> None:
159 |         """
160 |         Add a single object to the index.
161 | 
162 |         Args:
163 |             obj: The object to add to the index.
164 |         """
165 |         self.get_index_for_model(type(obj)).add_item(obj)
166 | 
167 |     def add_bulk(self, model: Type[T], obj_list: List[T]) -> None:
168 |         """
169 |         Add multiple objects to the index.
170 | 
171 |         Args:
172 |             model: The model of the objects being added.
173 |             obj_list (list): The list of objects to add to the index.
174 |         """
175 |         index = self.get_index_for_model(model)
176 |         index.add_items(model, obj_list)
177 | 
178 |     def delete(self, obj: Model) -> None:
179 |         """
180 |         Delete an object from the index.
181 | 
182 |         Args:
183 |             obj: The object to delete from the index.
184 |         """
185 |         self.get_index_for_model(type(obj)).delete_item(obj)
186 | 
187 |     def _search(
188 |         self,
189 |         query_compiler_class: Union[
190 |             Type[MeiliSearchQueryCompiler],
191 |             Type[MeiliSearchAutocompleteQueryCompiler],
192 |         ],
193 |         query: str,
194 |         model_or_queryset: Union[Type[Model], QuerySet],
195 |         **kwargs: Any,
196 |     ) -> Union[MeiliSearchResults, EmptySearchResults]:
197 |         """
198 |         Perform a search using the specified query compiler.
199 | 
200 |         Args:
201 |             query_compiler_class: The query compiler class to use.
202 |             query (str): The search query.
203 |             model_or_queryset: The model or queryset to search within.
204 |             **kwargs: Additional search parameters.
205 | 
206 |         Returns:
207 |             SearchResults: The search results.
208 |         """
209 |         if isinstance(model_or_queryset, QuerySet):
210 |             model = model_or_queryset.model
211 |             queryset = model_or_queryset
212 |         else:
213 |             model = model_or_queryset
214 |             queryset = model_or_queryset.objects.all()
215 | 
216 |         if not class_is_indexed(model):
217 |             return EmptySearchResults()
218 | 
219 |         if query == "":
220 |             return EmptySearchResults()
221 | 
222 |         search_query = query_compiler_class(queryset, query, **kwargs)
223 |         search_query.check()
224 | 
225 |         return self.results_class(self, search_query)
226 | 
227 |     def search(
228 |         self,
229 |         query: str,
230 |         model_or_queryset: Union[Type[Model], QuerySet],
231 |         fields: Optional[List[str]] = None,
232 |         operator: Optional[str] = None,
233 |         order_by_relevance: bool = True,
234 |     ) -> Union[MeiliSearchResults, EmptySearchResults]:
235 |         """
236 |         Perform a search.
237 | 
238 |         Args:
239 |             query (str): The search query.
240 |             model_or_queryset: The model or queryset to search within.
241 |             fields (list, optional): The fields to search in.
242 |             operator (str, optional): The operator to use for multiple search terms.
243 |             order_by_relevance (bool, optional): Whether to order results by relevance.
244 | 
245 |         Returns:
246 |             SearchResults: The search results.
247 |         """
248 |         return self._search(
249 |             self.query_compiler_class,
250 |             query,
251 |             model_or_queryset,
252 |             fields=fields,
253 |             operator=operator,
254 |             order_by_relevance=order_by_relevance,
255 |         )
256 | 
257 |     def autocomplete(
258 |         self,
259 |         query: str,
260 |         model_or_queryset: Union[Type[Model], QuerySet],
261 |         fields: Optional[List[str]] = None,
262 |         operator: Optional[str] = None,
263 |         order_by_relevance: bool = True,
264 |     ) -> Union[MeiliSearchResults, EmptySearchResults]:
265 |         """
266 |         Perform an autocomplete search.
267 | 
268 |         Args:
269 |             query (str): The autocomplete query.
270 |             model_or_queryset: The model or queryset to search within.
271 |             fields (list, optional): The fields to search in.
272 |             operator (str, optional): The operator to use for multiple search terms.
273 |             order_by_relevance (bool, optional): Whether to order results by relevance.
274 | 
275 |         Returns:
276 |             SearchResults: The autocomplete search results.
277 |         """
278 |         return self._search(
279 |             self.autocomplete_query_compiler_class,
280 |             query,
281 |             model_or_queryset,
282 |             fields=fields,
283 |             operator=operator,
284 |             order_by_relevance=order_by_relevance,
285 |         )
286 | 
287 | 
288 | SearchBackend = MeiliSearchBackend
289 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/results.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from typing import Any, Dict, List, Optional, Tuple, Type
  3 | 
  4 | from django.db.models import Case, Model, QuerySet, When
  5 | from wagtail.search.backends.base import BaseSearchResults
  6 | from wagtail.search.query import Fuzzy, Phrase, PlainText
  7 | 
  8 | from .utils import get_descendant_models, get_index_label, ranked_ids_from_search_results, weak_lru
  9 | 
 10 | 
 11 | class MeiliSearchResults(BaseSearchResults):
 12 |     """A class to handle search results from MeiliSearch.
 13 | 
 14 |     This class extends BaseSearchResults and provides methods to process
 15 |     and retrieve search results from MeiliSearch, including faceting and filtering
 16 |     capabilities.
 17 | 
 18 |     Attributes:
 19 |         _last_count: Cache for the last count result.
 20 |         supports_facet: Whether faceting is supported by this backend.
 21 |     """
 22 | 
 23 |     _last_count: Optional[int] = None
 24 |     supports_facet: bool = True
 25 | 
 26 |     def facet(self, field_name: str) -> OrderedDict:
 27 |         """
 28 |         Retrieve facet data for a given field from MeiliSearch. To use this, you'd do something
 29 |         like this:
 30 | 
 31 |         ```python
 32 |         Page.objects.search('query').facet('content_type_id')
 33 |         ```
 34 |         and this returns an ordered dictionary containing the facet data, ordered by the count
 35 |         of each facet value, like this...
 36 | 
 37 |         ```
 38 |         OrderedDict([('58', 197), ('75', 2), ('52', 1), ('54', 1), ('61', 1)])
 39 |         ```
 40 | 
 41 |         In this example, pages with the content type ID of 58 return 197 results, and so on.
 42 | 
 43 |         Args:
 44 |             field_name (str): The name of the field for which to retrieve facet data.
 45 | 
 46 |         Returns:
 47 |             OrderedDict: An ordered dictionary containing the facet data.
 48 |         """
 49 |         qc = self.query_compiler
 50 |         model = qc.queryset.model
 51 |         models = get_descendant_models(model)
 52 |         try:
 53 |             terms = qc.query.query_string
 54 |         except AttributeError:
 55 |             return None
 56 |         filter_field = f"{field_name}_filter"
 57 | 
 58 |         results = OrderedDict()
 59 |         for m in models:
 60 |             index = self.backend.get_index_for_model(m)
 61 |             filterable_fields = index.client.index(index.label).get_filterable_attributes()
 62 |             if filter_field in filterable_fields:
 63 |                 result = index.search(
 64 |                     terms,
 65 |                     {
 66 |                         "facets": [filter_field],
 67 |                     },
 68 |                 )
 69 |                 try:
 70 |                     res = result["facetDistribution"][filter_field]
 71 |                 except KeyError:
 72 |                     pass
 73 |                 else:
 74 |                     results.update(res)
 75 | 
 76 |         # Sort the results
 77 |         sorted_dict = OrderedDict(sorted(results.items(), key=lambda x: x[1], reverse=True))
 78 | 
 79 |         return sorted_dict
 80 | 
 81 |     def filter(self, filters: List[Tuple[str, str]], operator: str = "AND") -> QuerySet:
 82 |         """Filter search results based on field-value pairs.
 83 | 
 84 |         Takes a list of tuples containing filter fields and values as strings,
 85 |         and checks they're valid before passing them on to _do_search.
 86 | 
 87 |         Args:
 88 |             filters: A list of (field_name, value) tuples to filter by.
 89 |                 Example: [('category', 'news'), ('author', 'john')]
 90 | 
 91 |         Returns:
 92 |             QuerySet: Filtered search results.
 93 | 
 94 |         Raises:
 95 |             ValueError: If no filters are provided or if filters are invalid.
 96 |         """
 97 |         if not len(filters):
 98 |             msg = "No filters provided"
 99 |             raise ValueError(msg)
100 | 
101 |         for item in filters:
102 |             if not isinstance(item, tuple) or len(item) != 2:
103 |                 msg = f"Invalid filter item: {item}"
104 |                 raise ValueError(msg)
105 | 
106 |         res = self._do_search(filters=filters, operator=operator)
107 |         return res
108 | 
109 |     @weak_lru()
110 |     def _get_field_boosts(self, model: Type[Model]) -> Dict[str, float]:
111 |         """Get the boost values for fields in a given model.
112 | 
113 |         Args:
114 |             model: The model to get field boosts for.
115 | 
116 |         Returns:
117 |             Dict[str, float]: A dictionary mapping field names to their boost values.
118 |         """
119 |         boosts = {}
120 |         for field in model.search_fields:
121 |             if hasattr(field, "boost"):
122 |                 boosts[field.field_name] = field.boost
123 |         return boosts
124 | 
125 |     @property
126 |     def models(self) -> List[Type[Model]]:
127 |         """Get all descendant models of the queried model.
128 | 
129 |         Returns:
130 |             List[Type[Model]]: A list of descendant models.
131 |         """
132 |         return get_descendant_models(self.query_compiler.queryset.model)
133 | 
134 |     @property
135 |     def query_string(self) -> str:
136 |         """Get the query string from the query compiler.
137 | 
138 |         Returns:
139 |             str: The query string if it's a PlainText, Phrase, or Fuzzy query,
140 |                 otherwise an empty string.
141 |         """
142 |         query = self.query_compiler.query
143 |         if isinstance(query, (PlainText, Phrase, Fuzzy)):
144 |             return query.query_string
145 |         return ""
146 | 
147 |     def _build_queries(
148 |         self,
149 |         models: List[Type[Model]],
150 |         terms: str,
151 |         filters: Optional[List[Tuple[str, str]]] = None,
152 |         operator: str = "AND",
153 |     ) -> List[Dict[str, Any]]:
154 |         """Build a list of queries for MeiliSearch's multi-search API.
155 | 
156 |         Creates query dictionaries for each model and applies any filters,
157 |         suitable for passing to MeiliSearch's multi-search API.
158 | 
159 |         Args:
160 |             models: The models to search.
161 |             terms: The search terms.
162 |             filters: The filters to apply, as (field, value) tuples.
163 |                 Defaults to None.
164 | 
165 |         Returns:
166 |             List[Dict[str, Any]]: A list of query dictionaries ready for the API.
167 |         """
168 |         if filters is None:
169 |             filters = []
170 | 
171 |         # This block was actually part of the old boosts used before Meilisearch had
172 |         # native ranking. However, if I remove this, somehow we end up searching
173 |         # across all indexes instead of only those covered by the queryset we
174 |         # want to search in. Eventually I'll work out why and remove this.
175 |         models_boosts = {}
176 |         for model in models:
177 |             label = get_index_label(model)
178 |             models_boosts[label] = self._get_field_boosts(model)
179 | 
180 |         # Get active indexes
181 |         # For model types that don't have any documents, meilisearch won't
182 |         # create an index, so we have to check before running multi_search
183 |         # if an index exists, otherwise the entire multi_search call will fail.
184 |         limit = self.backend.settings.query_limit
185 |         active_index_dict = self.backend.client.get_indexes({"limit": limit})
186 |         active_indexes = [index for index in active_index_dict["results"]]
187 | 
188 |         queries = []
189 |         for index in active_indexes:
190 |             filterable_fields = index.get_filterable_attributes()
191 |             q = {  # noqa: PERF401
192 |                 "indexUid": index.uid,
193 |                 "q": terms,
194 |                 **self.backend.search_params,
195 |             }
196 |             if len(filters):
197 |                 filter_list = []
198 |                 for item in filters:
199 |                     filter_field = f"{item[0]}_filter"
200 |                     filter_value = item[1]
201 |                     if filter_field in filterable_fields:
202 |                         filter_list.append(f"{filter_field} = '{filter_value}'")
203 |                 q["filter"] = f" {operator} ".join(filter_list)
204 |             queries.append(q)
205 | 
206 |         return queries
207 | 
208 |     def _do_search(
209 |         self,
210 |         filters: Optional[List[Tuple[str, str]]] = None,
211 |         operator: str = "AND",
212 |     ) -> QuerySet:
213 |         """Perform the search operation.
214 | 
215 |         Executes the search query against MeiliSearch, processes the results,
216 |         calculates scores, and returns the results in the order specified by the query compiler.
217 | 
218 |         Args:
219 |             filters: Optional list of (field, value) tuples to filter the search results.
220 |                 Defaults to None.
221 | 
222 |         Returns:
223 |             QuerySet: A queryset of search results, ordered by relevance if specified.
224 |         """
225 |         models = self.models
226 |         terms = self.query_string
227 | 
228 |         queries = self._build_queries(models, terms, filters, operator)
229 |         multi_search_results = self.backend.client.multi_search(queries)
230 | 
231 |         # Get search results sorted by relevance score in descending order (highest scores first)
232 |         # We do this here so that we can pre-sort the ID list by rank so that if we're searching
233 |         # within a window of results, that window will only be searching within the top ranked
234 |         # results.
235 |         sorted_id_score_pairs = ranked_ids_from_search_results(multi_search_results)
236 |         id_to_score = {id: score for id, score in sorted_id_score_pairs}
237 |         sorted_ids = [id for id, _ in sorted_id_score_pairs]
238 | 
239 |         # Retrieve results from the database
240 |         qc = self.query_compiler
241 |         window_sorted_ids = sorted_ids[self.start : self.stop]
242 |         results = qc.queryset.filter(pk__in=window_sorted_ids)
243 | 
244 |         # Preserve the order by relevance score by annotating with actual scores
245 |         if qc.order_by_relevance and sorted_ids:
246 |             # Create a mapping from ID to its actual ranking score
247 |             # This directly uses the score values from MeiliSearch
248 |             # Higher scores will be ordered first when we use descending order
249 |             score_cases = [When(pk=pk, then=id_to_score.get(pk, 0.0)) for pk in sorted_ids]
250 | 
251 |             # Annotate the queryset with the actual scores
252 |             preserved_score = Case(*score_cases, default=0.0)
253 |             results = results.annotate(search_rank=preserved_score)
254 | 
255 |             # Order by the actual score in descending order (highest first)
256 |             results = results.order_by("-search_rank")
257 |         # Enable this for debugging
258 |         # for result in results:
259 |         #     print(f"{result.search_rank}: {result.id} - {result.title}")
260 | 
261 |         res = results.distinct()
262 | 
263 |         return res
264 | 
265 |     def _do_count(self) -> int:
266 |         """Count the total number of search results.
267 | 
268 |         This method gets called before _do_search when using Django's paginator.
269 |         It ensures that _results_cache and _count_cache are properly populated.
270 | 
271 |         Note:
272 |             This method gets called before _do_search when using Django pagination,
273 |             which means _results_cache and _count_cache may be empty on first run.
274 | 
275 |         Returns:
276 |             int: The total number of search results.
277 |         """
278 |         if self._count_cache:
279 |             return self._count_cache
280 |         if self._results_cache:
281 |             return len(self._results_cache)
282 | 
283 |         res = self._do_search()
284 |         self._count_cache = res.count()
285 |         self._results_cache = list(res)
286 |         return self._count_cache
287 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Wagtail MeiliSearch
  2 | 
  3 | This is a Wagtail search backend for the [MeiliSearch](https://github.com/meilisearch/MeiliSearch) search engine.
  4 | 
  5 | 
  6 | ## Installation
  7 | 
  8 | `uv add wagtail_meilisearch` or `pip install wagtail_meilisearch`
  9 | 
 10 | ## Upgrading
 11 | 
 12 | If you're upgrading MeiliSearch from 0.9.x to anything higher, you will need to destroy and re-create MeiliSearch's data.ms directory.
 13 | 
 14 | ## Requirements
 15 | 
 16 | - Python >=3.10
 17 | - wagtail >=6.0
 18 | - meilisearch-python >= 0.36.0
 19 | 
 20 | Tested against Meilisearch server v1.15.2 - latest at the time of writing.
 21 | 
 22 | ## Configuration
 23 | 
 24 | See the [MeiliSearch docs](https://docs.meilisearch.com/guides/advanced_guides/installation.html#environment-variables-and-flags) for info on the values you want to add here.
 25 | 
 26 | ```python
 27 | WAGTAILSEARCH_BACKENDS = {
 28 |     'default': {
 29 |         'BACKEND': 'wagtail_meilisearch.backend',
 30 |         'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'),
 31 |         'PORT': os.environ.get('MEILISEARCH_PORT', '7700'),
 32 |         'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '')
 33 |     },
 34 | }
 35 | ```
 36 | 
 37 | ## Update strategies
 38 | 
 39 | Indexing a very large site with `python manage.py update_index` can be pretty taxing on the CPU, take quite a long time, and reduce the responsiveness of the MeiliSearch server. Wagtail-MeiliSearch offers two update strategies, `soft` and `hard`. The default, `soft` strategy will do an "add or update" call for each document sent to it, while the `hard` strategy will delete every document in the index and then replace them.
 40 | 
 41 | There are tradeoffs with either strategy - `hard` will guarantee that your search data matches your model data, but be hard work on the CPU for longer. `soft` will be faster and less CPU intensive, but if a field is removed from your model between indexings, that field data will remain in the search index.
 42 | 
 43 | ### Delta strategy
 44 | 
 45 | The `delta` strategy is useful if you habitually add created_at and updated_at timestamps to your models. This strategy will check the fields...
 46 | 
 47 | * `first_published_at`
 48 | * `last_published_at`
 49 | * `created_at`
 50 | * `updated_at`
 51 | 
 52 | And only update the records for objects where one or more of these fields has a date more recent than the time delta specified in the settings.
 53 | 
 54 | ```python
 55 | WAGTAILSEARCH_BACKENDS = {
 56 |     'default': {
 57 |         'BACKEND': 'wagtail_meilisearch.backend',
 58 |         'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'),
 59 |         'PORT': os.environ.get('MEILISEARCH_PORT', '7700'),
 60 |         'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', '')
 61 |         'UPDATE_STRATEGY': delta,
 62 |         'UPDATE_DELTA': {
 63 |             'weeks': -1
 64 |         }
 65 |     }
 66 | }
 67 | ```
 68 | 
 69 | If the delta is set to `{'weeks': -1}`, wagtail-meilisearch will only update indexes for documents where one of the timestamp fields has a date within the last week. Your time delta _must_ be a negative.
 70 | 
 71 | Under the hood we use [Arrow](https://arrow.readthedocs.io), so you can use any keyword args supported by [Arrow's `shift()`](https://arrow.readthedocs.io/en/latest/index.html#replace-shift).
 72 | 
 73 | If you set `UPDATE_STRATEGY` to `delta` but don't provide a value for `UPDATE_DELTA` wagtail-meilisearch will default to `{'weeks': -1}`.
 74 | 
 75 | ## Skip models
 76 | 
 77 | Sometimes you might have a site where a certain page model is guaranteed not to change, for instance an archive section. After creating your initial search index, you can add a `SKIP_MODELS` key to the config to tell wagtail-meilisearch to ignore specific models when running `update_index`. Behind the scenes wagtail-meilisearch returns a dummy model index to the `update_index` management command for every model listed in your `SKIP_MODELS` - this ensures that this setting only affects `update_index`, so if you manually edit one of the models listed it should get re-indexed with the update signal.
 78 | 
 79 | ```python
 80 | WAGTAILSEARCH_BACKENDS = {
 81 |     'default': {
 82 |         'BACKEND': 'wagtail_meilisearch.backend',
 83 |         'HOST': os.environ.get('MEILISEARCH_HOST', 'http://127.0.0.1'),
 84 |         'PORT': os.environ.get('MEILISEARCH_PORT', '7700'),
 85 |         'MASTER_KEY': os.environ.get('MEILI_MASTER_KEY', ''),
 86 |         'UPDATE_STRATEGY': 'delta',
 87 |         'SKIP_MODELS': [
 88 |             'core.ArchivePage',
 89 |         ]
 90 |     }
 91 | }
 92 | ```
 93 | 
 94 | ## Stop Words
 95 | 
 96 | Stop words are words for which we don't want to place significance on their frequency. For instance, the search query `tom and jerry` would return far less relevant results if the word `and` was given the same importance as `tom` and `jerry`. There's a fairly sane list of English language stop words supplied, but you can also supply your own. This is particularly useful if you have a lot of content in any other language.
 97 | 
 98 | ```python
 99 | MY_STOP_WORDS = ['a', 'list', 'of', 'words']
100 | 
101 | WAGTAILSEARCH_BACKENDS = {
102 |     'default': {
103 |         'BACKEND': 'wagtail_meilisearch.backend',
104 |         [...]
105 |         'STOP_WORDS': MY_STOP_WORDS
106 |     },
107 | }
108 | ```
109 | 
110 | Or alternatively, you can extend the built in list.
111 | 
112 | ```python
113 | from wagtail_meilisearch.settings import STOP_WORDS
114 | 
115 | MY_STOP_WORDS = STOP_WORDS + WELSH_STOP_WORDS + FRENCH_STOP_WORDS
116 | 
117 | WAGTAILSEARCH_BACKENDS = {
118 |     'default': {
119 |         'BACKEND': 'wagtail_meilisearch.backend',
120 |         [...]
121 |         'STOP_WORDS': MY_STOP_WORDS
122 |     },
123 | }
124 | ```
125 | 
126 | ## Ranking
127 | 
128 | We now support Meilisearch's native ranking system which is considerably faster than the rather hacky way we were having to do it before. Meilisearch takes a [list of fields ordered by precedence](https://www.meilisearch.com/docs/learn/relevancy/attribute_ranking_order) to affect the attribute ranking so we build that list by inspecting the `index.SearchField`s and `index.AutocompleteField`s on each model and ordering by boost. As an example, if you want the page title to be the most important field to rank on...
129 | 
130 | ```python
131 | search_fields = Page.search_fields + [
132 |     index.AutocompleteField("title", boost=10),
133 |     index.SearchField("body"),
134 |     index.SearchField("search_description", boost=5),
135 | ]
136 | 
137 | ```
138 | 
139 | Any field that doesn't have a `boost` value will be given a default of 0 but will still be sent to Meilisearch's settings as part of the ordered list, so the above settings send an attribute ranking order to Meilisearch of...
140 | 
141 | ```python
142 | ['title', 'search_description', 'body']
143 | ```
144 | 
145 | In the backend, we automatically annotate the search results with their ranking, with a float between 0 and 1 as `search_rank` so in your search view you can sort by that value.
146 | 
147 | ```python
148 | def search_view(request):
149 |     search_query = request.GET.get('query', '')
150 |     search_results = Page.objects.search(search_query)
151 | 
152 |     # Results are already sorted by search_rank
153 |     # You can access the rank for each result
154 |     for result in search_results:
155 |         print(f"Result: {result.title}, Rank: {result.search_rank}")
156 | 
157 |     return render(request, 'search_results.html.j2', {
158 |         'search_query': search_query,
159 |         'search_results': search_results,
160 |     })
161 | ```
162 | 
163 | And you might even fancy using the search rank in your template...
164 | 
165 | ```jinja2
166 | {% for result in search_results %}
167 |     <div class="result {% if result.search_rank > 0.8 %}high-relevance{% endif %}">
168 |         <h3>{{ result.title }}</h3>
169 |         <p>Relevance: {{ result.search_rank }}</p>
170 |     </div>
171 | {% endfor %}
172 | ```
173 | 
174 | ## Faceting
175 | 
176 | We now support faceting. In order to use it, you need to add `FilterField`s to your model on any field that you might want to facet on...
177 | 
178 | ```python
179 | search_fields = Page.search_fields + [
180 |     index.AutocompleteField("title", boost=10),
181 |     index.SearchField("body"),
182 |     index.SearchField("search_description", boost=5),
183 |     index.FilterField("category"),
184 | ]
185 | ```
186 | 
187 | With that in place, you can call `facet` on a search to get an OrderedDict of the facet values and their counts. By default, Wagtail adds several `FilterField`s to the Page model too, so for instance you can get the facet results of `content_type_id` with...
188 | 
189 | ```python
190 | Page.objects.search("query").facet("content_type")
191 | 
192 | # OrderedDict([('58', 197), ('75', 2), ('52', 1), ('54', 1), ('61', 1)])
193 | ```
194 | 
195 | The ordered dict contains tuples of the form `(value, count)` where `value: str` is the value of the field (typically its pk) and `count` is the number of documents that have that value.
196 | 
197 | ### Filtering
198 | 
199 | Armed with your facet counts, you can filter your search results by passing `filters` to the `filter` method. For example, to filter by `content_type_id`...
200 | 
201 | ```python
202 | Page.objects.search("query").filter(filters=[("content_type", "58")])
203 | 
204 | # <PageQuerySet [<Page: Page 1>, <Page: Page 2>, ...]
205 | ```
206 | 
207 | The `filters` param should be a list of tuples, where each tuple is of the form `(field, value)`. Being a list, you can pass multiple tuples to filter by multiple fields. For example, to filter by `content_type` and `category`...
208 | 
209 | ```python
210 | Page.objects.search("query").filter(filters=[("content_type", "58"), ("category", "1")])
211 | 
212 | # <PageQuerySet [<Page: Page 1>, <Page: Page 2>, ...]
213 | ```
214 | 
215 | And finally, you can choose the operator for the filter. By default, the operator is `AND`, but you can also use `OR`...
216 | 
217 | ```python
218 | Page.objects.search("query").filter(filters=[("content_type", "58"), ("category", "1")], operator="OR")
219 | 
220 | # <PageQuerySet [<Page: Page 1>, <Page: Page 2>, ...]
221 | ```
222 | 
223 | ## Query limits
224 | 
225 | If you have a lot of DB documents, the final query to the database can be quite a heavy load. Meilisearch's relevance means that it's usually pretty safe to restrict the number of documents Meilisearch returns, and therefore the number of documents your app needs to get from the database. The limit is **per model**, so if your project has 10 page types and you set a limit of 1000, there's a possible 10000 results.
226 | 
227 | ```python
228 | WAGTAILSEARCH_BACKENDS = {
229 |     'default': {
230 |         'BACKEND': 'wagtail_meilisearch.backend',
231 |         [...]
232 |         'QUERY_LIMIT': 1000
233 |     },
234 | }
235 | ```
236 | 
237 | ## Contributing
238 | 
239 | If you want to help with the development I'd be more than happy. The vast majority of the heavy lifting is done by MeiliSearch itself, but there is a TODO list...
240 | 
241 | 
242 | ### TODO
243 | 
244 | * Write tests
245 | * Performance improvements
246 | * Make use of the async in meilisearch-python
247 | * ~~Faceting~~
248 | * ~~Implement boosting in the sort algorithm~~
249 | * ~~Implement stop words~~
250 | * ~~Search results~~
251 | * ~~Add support for the autocomplete api~~
252 | * ~~Ensure we're getting results by relevance~~
253 | 
254 | ## Change Log
255 | 
256 | #### 1.0.0
257 | * Big speed improvements thanks to using Meilisearch's native ranking system
258 | * Adds faceting
259 | * Adds filtering
260 | * Adds typing throughout
261 | 
262 | #### 0.17.3
263 | * Fixes a bug where the meilisearch indexes could end up with a wrong maxTotalHits
264 | 
265 | #### 0.17.2
266 | * Fixes a bug where the backend could report the wrong counts for results. This turned out to be down to the fact that _do_count can sometimes get called before _do_search, possibly due to Django's paginator. This finally explains why sometimes search queries ran twice.
267 | 
268 | #### 0.17.1
269 | * Fixes a bug where multi_search can fail when a model index doesn't exist. For models have no documents meilisearch doesn't create the empty index, so we need to check active indexes before calling multi_search otherwise the entire call fails.
270 | 
271 | #### 0.17.0
272 | * A few small performance and reliability improvements, and a lot of refactoring of the code into multiple files to make future development a bit simpler.
273 | 
274 | #### 0.16.0
275 | * Thanks to @BertrandBordage, a massive speed improvement through using the /multi-search endpoint introduced in Meilisearch 1.1.0
276 | 
277 | #### 0.14.0
278 | * Adds Django 4 support and compatibility with the latest meilisearch server (0.30.2) and meilisearch python (0.23.0)
279 | 
280 | #### 0.14.0
281 | * Updates to work with the latest versions of Meilisearch (v0.28.1) and meilisearch-python (^0.19.1)
282 | 
283 | #### 0.13.0
284 | * Yanked, sorry
285 | 
286 | #### 0.12.0
287 | * Adds QUERY_LIMIT option to settings
288 | 
289 | #### 0.11.0
290 | * Compatibility changes to keep up with MeiliSearch and [meilisearch-python](https://github.com/meilisearch/meilisearch-python)
291 | * we've also switched to more closely tracking the major and minor version numbers of meilisearch-python so that it's easier to see compatibility at a glance.
292 | * Note: if you're upgrading from an old version of MeiliSearch you may need to destroy MeiliSearch's data directory and start with a clean index.
293 | 
294 | #### 0.1.5
295 | * Adds the delta update strategy
296 | * Adds the SKIP_MODELS setting
297 | * Adds support for using boost on your search fields
298 | 
299 | 
300 | ### Thanks
301 | 
302 | Thank you to the devs of [Wagtail-Whoosh](https://github.com/wagtail/wagtail-whoosh). Reading the code over there was the only way I could work out how Wagtail Search backends are supposed to work.
303 | 


--------------------------------------------------------------------------------
/src/wagtail_meilisearch/index.py:
--------------------------------------------------------------------------------
  1 | import contextlib
  2 | 
  3 | # Import for type checking only
  4 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, cast
  5 | 
  6 | import arrow
  7 | from django.core.cache import cache
  8 | from django.db.models import Model
  9 | from django.utils.functional import cached_property
 10 | from meilisearch.index import Index
 11 | from requests.exceptions import HTTPError
 12 | 
 13 | from .utils import get_document_fields
 14 | 
 15 | if TYPE_CHECKING:
 16 |     from .backend import MeiliSearchBackend
 17 |     from .settings import MeiliSettings
 18 | 
 19 | try:
 20 |     from cacheops import invalidate_model
 21 | 
 22 |     USING_CACHEOPS = True
 23 | except ImportError:
 24 |     USING_CACHEOPS = False
 25 | 
 26 | 
 27 | class MeiliIndexError(Exception):
 28 |     pass
 29 | 
 30 | 
 31 | class MeiliIndexRegistry:
 32 |     """A registry of all the indexes we're using.
 33 | 
 34 |     This class maintains a registry of all MeiliSearch indexes and provides methods
 35 |     to retrieve and manage them.
 36 | 
 37 |     Attributes:
 38 |         indexes (Dict[str, MeiliSearchModelIndex]): Dictionary mapping labels to index objects.
 39 |     """
 40 | 
 41 |     indexes: Dict[str, "MeiliSearchModelIndex"] = {}
 42 | 
 43 |     def __init__(self, backend: Any, settings: Any) -> None:
 44 |         """Initialize the MeiliIndexRegistry.
 45 | 
 46 |         Args:
 47 |             backend: The search backend instance.
 48 |             settings: The settings for the search backend.
 49 |         """
 50 |         self.backend = backend
 51 |         self.client = backend.client
 52 |         self.settings = settings
 53 | 
 54 |     def _get_label(self, model: Type[Model]) -> str:
 55 |         """Get a unique label for the model's index.
 56 | 
 57 |         Args:
 58 |             model: The model to get the label for.
 59 | 
 60 |         Returns:
 61 |             str: A unique label for the model's index.
 62 |         """
 63 |         label = model._meta.label.replace(".", "-")
 64 |         return label
 65 | 
 66 |     def get_index_for_model(self, model: Type[Model]) -> "MeiliSearchModelIndex":
 67 |         """Get the index for a specific model.
 68 | 
 69 |         This gets called by the get_index_for_model in the backend which in turn is called by
 70 |         update_index management command so needs to exist as a method on the backend.
 71 | 
 72 |         Args:
 73 |             model: The model we're looking for the index for.
 74 | 
 75 |         Returns:
 76 |             MeiliSearchModelIndex: The index for the model.
 77 |         """
 78 |         label = self._get_label(model)
 79 | 
 80 |         # See if it's in our registry
 81 |         if label in self.indexes:
 82 |             return self.indexes.get(label)
 83 | 
 84 |         # See if it's in the cache
 85 |         cache_key = f"meili_index_{label}"
 86 |         index = cache.get(cache_key)
 87 |         if index is None:
 88 |             index = MeiliSearchModelIndex(
 89 |                 backend=self.backend,
 90 |                 model=model,
 91 |             )
 92 |             cache.set(cache_key, index)
 93 | 
 94 |         self.register(label, index)
 95 |         return index
 96 | 
 97 |     def register(self, label: str, index: "MeiliSearchModelIndex") -> None:
 98 |         """Register an index with a label.
 99 | 
100 |         Args:
101 |             label: The label to register the index under.
102 |             index: The index to register.
103 |         """
104 |         self.indexes[label] = index
105 | 
106 |     def _refresh(self, uid: str, model: Type[Model]) -> "MeiliSearchModelIndex":
107 |         """Refresh an index by deleting and recreating it.
108 | 
109 |         Args:
110 |             uid: The unique ID of the index to refresh.
111 |             model: The model associated with the index.
112 | 
113 |         Returns:
114 |             MeiliSearchModelIndex: The newly created index.
115 |         """
116 |         index = self.client.get_index(uid)
117 |         index.delete()
118 |         new_index = self.get_index_for_model(model)
119 |         return new_index
120 | 
121 | 
122 | class MeiliSearchModelIndex:
123 |     """Creates a working index for each model sent to it."""
124 | 
125 |     def __init__(self, backend: Any, model: Optional[Type[Model]]) -> None:
126 |         """Initialize the MeiliSearchModelIndex.
127 | 
128 |         Creates a working index for the specified model and sets up all the necessary
129 |         properties for interacting with MeiliSearch.
130 | 
131 |         Args:
132 |             backend: The backend instance.
133 |             model: The Django model to be indexed.
134 |         """
135 |         self.backend: "MeiliSearchBackend" = backend
136 |         self.settings: "MeiliSettings" = backend.settings
137 |         settings: "MeiliSettings" = self.settings
138 |         self.model: Optional[Type[Model]] = model
139 | 
140 |         self.client: Any = backend.client
141 |         self.query_limit: int = settings.query_limit
142 |         self.name: str = "" if model is None else model._meta.label
143 |         self.model_fields: Set[str] = set()
144 |         if model is not None:
145 |             self.model_fields = set(_.name for _ in model._meta.fields)
146 | 
147 |         self.index: Index = self._set_index(model)
148 |         self.search_params: Dict[str, Any] = {
149 |             "limit": self.query_limit,
150 |             "attributesToRetrieve": ["id", "first_published_at"],
151 |             "showMatchesPosition": True,
152 |         }
153 |         self.update_strategy: str = settings.update_strategy
154 |         self.update_delta: Optional[Dict[str, int]] = settings.update_delta
155 |         self.delta_fields: List[str] = [
156 |             "created_at",
157 |             "updated_at",
158 |             "first_published_at",
159 |             "last_published_at",
160 |         ]
161 |         self.label: str = "" if model is None else self._get_label(model)
162 | 
163 |     def _get_index_settings(self, label: str) -> Dict[str, Any]:
164 |         """Get the settings for the index.
165 | 
166 |         Retrieves the current settings for the specified MeiliSearch index.
167 | 
168 |         Args:
169 |             label: The label of the index.
170 | 
171 |         Returns:
172 |             Dict[str, Any]: The settings for the index.
173 | 
174 |         Raises:
175 |             MeiliIndexError: If unable to get the index settings.
176 |         """
177 |         try:
178 |             return self.client.get_index(label).get_settings()
179 |         except Exception as err:
180 |             msg = f"Failed to get settings for {label}: {err}"
181 |             raise MeiliIndexError(msg) from err
182 | 
183 |     def _set_index(self, model: Optional[Type[Model]]) -> Index:
184 |         """Set up the index for the given model.
185 | 
186 |         Creates or retrieves the MeiliSearch index for the specified model.
187 | 
188 |         Args:
189 |             model: The Django model to create an index for.
190 | 
191 |         Returns:
192 |             Index: The MeiliSearch index object.
193 |         """
194 |         if hasattr(self, "index") and self.index:
195 |             return self.index
196 | 
197 |         if model is None:
198 |             return cast("Index", None)  # This should never be reached in practice
199 | 
200 |         label = self._get_label(model)
201 |         # if index doesn't exist, create
202 |         try:
203 |             index = self.client.index(label)
204 |         except HTTPError:
205 |             # Create the index with primary key setting
206 |             Index.create(self.client.http.config, label, {"primaryKey": "id"})
207 |             index = self.client.index(label)
208 | 
209 |         self.index = index
210 | 
211 |         return index
212 | 
213 |     def _get_label(self, model: Type[Model]) -> str:
214 |         """Get a unique label for the model's index.
215 | 
216 |         Args:
217 |             model: The model to get the label for.
218 | 
219 |         Returns:
220 |             str: A unique label for the model's index.
221 |         """
222 |         if hasattr(self, "label") and self.label:
223 |             return self.label
224 | 
225 |         self.label = label = model._meta.label.replace(".", "-")
226 |         return label
227 | 
228 |     def _rebuild(self) -> None:
229 |         """Rebuild the index by deleting and recreating it.
230 | 
231 |         This method completely recreates the index, which will remove all
232 |         documents and reset all settings.
233 |         """
234 |         self.index.delete()
235 |         self._set_index(self.model)
236 | 
237 |     def add_model(self, model: Type[Model]) -> None:
238 |         """
239 |         Add a model to the index. This method is a no-op as adding is done on initialization.
240 | 
241 |         Args:
242 |             model (Model): The Django model to add to the index.
243 |         """
244 |         pass
245 | 
246 |     def get_index_for_model(self, model: Type[Model]) -> "MeiliSearchModelIndex":
247 |         """
248 |         Get the index for the given model.
249 | 
250 |         Args:
251 |             model (Model): The Django model to get the index for.
252 | 
253 |         Returns:
254 |             MeiliSearchModelIndex: The index for the given model.
255 |         """
256 |         self._set_index(model)
257 |         return self
258 | 
259 |     def _get_document_fields(self, model: Type[Model], item: Model) -> Dict[str, Any]:
260 |         """Get the fields for a document to be indexed.
261 | 
262 |         Extracts all indexable fields from the item using the model's search field definitions.
263 | 
264 |         Args:
265 |             model: The Django model of the item.
266 |             item: The item to be indexed.
267 | 
268 |         Returns:
269 |             Dict[str, Any]: The fields of the document to be indexed.
270 |         """
271 |         return get_document_fields(model, item)
272 | 
273 |     def _create_document(self, model: Type[Model], item: Model) -> Dict[str, Any]:
274 |         """Create a document to be indexed.
275 | 
276 |         Builds a complete document dictionary with all fields and the ID for indexing.
277 | 
278 |         Args:
279 |             model: The Django model of the item.
280 |             item: The item to be indexed.
281 | 
282 |         Returns:
283 |             Dict[str, Any]: The document to be indexed.
284 |         """
285 |         doc_fields = dict(self._get_document_fields(model, item))
286 |         doc_fields.update(id=item.id)
287 |         return doc_fields
288 | 
289 |     def refresh(self) -> None:
290 |         """Refresh the index.
291 | 
292 |         This method is a no-op in the current implementation.
293 |         It exists to maintain compatibility with the Wagtail search API.
294 |         """
295 |         pass
296 | 
297 |     def add_item(self, item: Model) -> None:
298 |         """Add a single item to the index.
299 | 
300 |         Indexes a single model instance according to the current update strategy.
301 |         If using the delta update strategy, only adds the item if it was modified
302 |         within the delta time period.
303 | 
304 |         Args:
305 |             item: The item to be added to the index.
306 |         """
307 |         if self.update_strategy == "delta":
308 |             checked = self._check_deltas([item])
309 |             if len(checked):
310 |                 item = checked[0]
311 | 
312 |         if self.model is None:
313 |             return
314 | 
315 |         doc = self._create_document(self.model, item)
316 |         if self.update_strategy == "soft":
317 |             self.index.update_documents([doc])
318 |         else:
319 |             self.index.add_documents([doc])
320 | 
321 |     def add_items(self, item_model: Type[Model], items: List[Model]) -> bool:
322 |         """Add multiple items to the index.
323 | 
324 |         Indexes multiple model instances according to the current update strategy.
325 |         Processes items in chunks of 100 to avoid overwhelming the MeiliSearch instance.
326 |         If using the delta update strategy, only adds items that were modified
327 |         within the delta time period.
328 | 
329 |         Args:
330 |             item_model: The Django model of the items.
331 |             items: The items to be added to the index.
332 | 
333 |         Returns:
334 |             bool: True if the operation was successful.
335 |         """
336 |         if USING_CACHEOPS:
337 |             with contextlib.suppress(Exception):
338 |                 invalidate_model(item_model)
339 | 
340 |         chunks: List[List[Model]] = [items[x : x + 100] for x in range(0, len(items), 100)]
341 | 
342 |         for chunk in chunks:
343 |             if self.update_strategy == "delta":
344 |                 chunk = self._check_deltas(chunk)
345 |             if self.model is None:
346 |                 continue
347 |             prepared = [self._create_document(self.model, item) for item in chunk]
348 |             with contextlib.suppress(Exception):
349 |                 if prepared:
350 |                     if self.update_strategy in ["soft", "delta"]:
351 |                         self.index.update_documents(prepared)
352 |                     else:
353 |                         self.index.add_documents(prepared)
354 |         return True
355 | 
356 |     @cached_property
357 |     def _has_date_fields(self) -> bool:
358 |         """Check if the model has any of the delta fields.
359 | 
360 |         Determines if the model has any fields that can be used for delta updates
361 |         (created_at, updated_at, first_published_at, last_published_at).
362 | 
363 |         Returns:
364 |             bool: True if the model has any of the delta fields, False otherwise.
365 |         """
366 |         return bool(self.model_fields.intersection(self.delta_fields))
367 | 
368 |     def _check_deltas(self, objects: List[Model]) -> List[Model]:
369 |         """Filter objects based on the delta update strategy.
370 | 
371 |         When using the delta update strategy, this method filters the objects list
372 |         to only include items that have been created or modified within the
373 |         specified time period.
374 | 
375 |         Args:
376 |             objects: The objects to be filtered.
377 | 
378 |         Returns:
379 |             List[Model]: The filtered list of objects.
380 |         """
381 |         filtered: List[Model] = []
382 |         if not self.update_delta:
383 |             return filtered
384 | 
385 |         since = arrow.now().shift(**self.update_delta).datetime
386 |         for obj in objects:
387 |             if self._has_date_fields:
388 |                 for field in self.delta_fields:
389 |                     if hasattr(obj, field):
390 |                         val = getattr(obj, field)
391 |                         try:
392 |                             if val and val > since:
393 |                                 filtered.append(obj)
394 |                                 break
395 |                         except TypeError:
396 |                             pass
397 |         return filtered
398 | 
399 |     def delete_item(self, obj: Model) -> None:
400 |         """Delete an item from the index.
401 | 
402 |         Removes a single document from the index based on its ID.
403 | 
404 |         Args:
405 |             obj: The object to be deleted from the index.
406 |         """
407 |         self.index.delete_document(obj.id)
408 | 
409 |     def delete_all_documents(self) -> None:
410 |         """Delete all documents from the index.
411 | 
412 |         Removes all documents from the index while preserving the index settings.
413 |         This is faster than deleting and recreating the index.
414 |         """
415 |         self.index.delete_all_documents()
416 | 
417 |     def search(self, query: str, extras: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
418 |         """Perform a search on the index.
419 | 
420 |         Executes a search query against the MeiliSearch index with the specified
421 |         search parameters.
422 | 
423 |         Args:
424 |             query: The search query string.
425 |             extras: Optional additional search parameters to include in the request.
426 |                 These will be merged with the default search parameters.
427 | 
428 |         Returns:
429 |             Dict[str, Any]: The search results from MeiliSearch.
430 |         """
431 |         if extras is None:
432 |             extras = {}
433 |         params = self.backend.search_params
434 |         if len(extras):
435 |             params.update(**extras)
436 | 
437 |         return self.index.search(query, params)
438 | 
439 |     def __str__(self) -> str:
440 |         """Get a string representation of the index.
441 | 
442 |         Returns the name of the index for easy identification.
443 | 
444 |         Returns:
445 |             str: The name of the index.
446 |         """
447 |         return self.name
448 | 
449 | 
450 | class DummyModelIndex:
451 |     """A dummy model index that performs no actual indexing operations.
452 | 
453 |     This class enables the SKIP_MODELS feature by providing a dummy
454 |     implementation of the MeiliSearchModelIndex interface that can receive
455 |     add operations without actually indexing anything.
456 | 
457 |     This is useful for models that should be excluded from search but still
458 |     need to go through the indexing workflow.
459 |     """
460 | 
461 |     def add_model(self, model: Type[Model]) -> None:
462 |         """Add a model to the index (no-op).
463 | 
464 |         Args:
465 |             model: The model to be added (ignored).
466 |         """
467 |         pass
468 | 
469 |     def add_items(self, model: Type[Model], chunk: List[Model]) -> None:
470 |         """Add items to the index (no-op).
471 | 
472 |         Args:
473 |             model: The model of the items (ignored).
474 |             chunk: The items to be added (ignored).
475 |         """
476 |         pass
477 | 


--------------------------------------------------------------------------------