├── .dockerignore ├── .github ├── release-drafter-config.yml └── workflows │ ├── check-pypi.yml │ ├── codeql-analysis.yml │ ├── publish-pypi.yml │ └── release-drafter.yml ├── .gitignore ├── API.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── gendoc.py ├── pyproject.toml ├── redisearch ├── __init__.py ├── _util.py ├── aggregation.py ├── auto_complete.py ├── client.py ├── document.py ├── query.py ├── querystring.py ├── reducers.py └── result.py ├── test ├── docker │ ├── Dockerfile │ ├── Makefile │ └── test.sh ├── test-setup.sh ├── test.py ├── test_builder.py ├── titles.csv └── will_play_text.csv.bz2 └── tox.ini /.dockerignore: -------------------------------------------------------------------------------- 1 | /venv*/ 2 | -------------------------------------------------------------------------------- /.github/release-drafter-config.yml: -------------------------------------------------------------------------------- 1 | name-template: 'Version $NEXT_PATCH_VERSION' 2 | tag-template: 'v$NEXT_PATCH_VERSION' 3 | categories: 4 | - title: '🚀Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: 'Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: 'Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 16 | exclude-labels: 17 | - 'skip-changelog' 18 | template: | 19 | ## Changes 20 | 21 | $CHANGES -------------------------------------------------------------------------------- /.github/workflows/check-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Check if required secrets are set to publish to Pypi 2 | 3 | on: push 4 | 5 | jobs: 6 | checksecret: 7 | name: check if PYPI_TOKEN and TESTPYPI_TOKEN are set in github secrets 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Check PYPI_TOKEN 11 | env: 12 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 13 | run: | 14 | if ${{ env.PYPI_TOKEN == '' }} ; then 15 | echo "PYPI_TOKEN secret is not set" 16 | exit 1 17 | fi 18 | - name: Check TESTPYPI_TOKEN 19 | env: 20 | TESTPYPI_TOKEN: ${{ secrets.TESTPYPI_TOKEN }} 21 | run: | 22 | if ${{ env.TESTPYPI_TOKEN == '' }} ; then 23 | echo "TESTPYPI_TOKEN secret is not set" 24 | exit 1 25 | fi 26 | 27 | 28 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '26 20 * * 3' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Pypi 2 | on: 3 | release: 4 | types: [ published ] 5 | 6 | jobs: 7 | pytest: 8 | name: Publish to PyPi 9 | runs-on: ubuntu-latest 10 | env: 11 | ACTIONS_ALLOW_UNSECURE_COMMANDS: true 12 | steps: 13 | - uses: actions/checkout@master 14 | 15 | - name: get version from tag 16 | id: get_version 17 | run: | 18 | realversion="${GITHUB_REF/refs\/tags\//}" 19 | realversion="${realversion//v/}" 20 | echo "::set-output name=VERSION::$realversion" 21 | 22 | - name: Set the version for publishing 23 | uses: ciiiii/toml-editor@1.0.0 24 | with: 25 | file: "pyproject.toml" 26 | key: "tool.poetry.version" 27 | value: "${{ steps.get_version.outputs.VERSION }}" 28 | 29 | - name: Set up Python 3.7 30 | uses: actions/setup-python@v1 31 | with: 32 | python-version: 3.7 33 | 34 | - name: Install Poetry 35 | uses: dschep/install-poetry-action@v1.3 36 | 37 | - name: Cache Poetry virtualenv 38 | uses: actions/cache@v1 39 | id: cache 40 | with: 41 | path: ~/.virtualenvs 42 | key: poetry-${{ hashFiles('**/poetry.lock') }} 43 | restore-keys: | 44 | poetry-${{ hashFiles('**/poetry.lock') }} 45 | 46 | - name: Set Poetry config 47 | run: | 48 | poetry config virtualenvs.in-project false 49 | poetry config virtualenvs.path ~/.virtualenvs 50 | 51 | - name: Install Dependencies 52 | run: poetry install 53 | if: steps.cache.outputs.cache-hit != 'true' 54 | 55 | - name: Publish to PyPI 56 | if: github.event_name == 'release' 57 | run: | 58 | poetry publish -u __token__ -p ${{ secrets.PYPI_TOKEN }} --build 59 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v5 15 | with: 16 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 17 | config-name: release-drafter-config.yml 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # general datasets related inputs/outputs 2 | 3 | *.json 4 | *.tar.gz 5 | *.csv 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # IPython 64 | profile_default/ 65 | ipython_config.py 66 | 67 | # pyenv 68 | .python-version 69 | 70 | # pipenv 71 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 72 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 73 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 74 | # install all needed dependencies. 75 | #Pipfile.lock 76 | 77 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 78 | __pypackages__/ 79 | 80 | # Celery stuff 81 | celerybeat-schedule 82 | celerybeat.pid 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Rope project settings 94 | .ropeproject 95 | 96 | # mkdocs documentation 97 | /site 98 | 99 | # mypy 100 | .mypy_cache/ 101 | .dmypy.json 102 | dmypy.json 103 | 104 | # Pyre type checker 105 | .pyre/ 106 | 107 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 108 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 109 | 110 | *.idea/ 111 | 112 | # User-specific stuff 113 | .idea/**/workspace.xml 114 | .idea/**/tasks.xml 115 | .idea/**/usage.statistics.xml 116 | .idea/**/dictionaries 117 | .idea/**/shelf 118 | 119 | # Generated files 120 | .idea/**/contentModel.xml 121 | 122 | # Sensitive or high-churn files 123 | .idea/**/dataSources/ 124 | .idea/**/dataSources.ids 125 | .idea/**/dataSources.local.xml 126 | .idea/**/sqlDataSources.xml 127 | .idea/**/dynamic.xml 128 | .idea/**/uiDesigner.xml 129 | .idea/**/dbnavigator.xml 130 | 131 | # IntelliJ 132 | out/ 133 | 134 | # mpeltonen/sbt-idea plugin 135 | .idea_modules/ 136 | 137 | # JIRA plugin 138 | atlassian-ide-plugin.xml 139 | 140 | # Cursive Clojure plugin 141 | .idea/replstate.xml 142 | 143 | # Crashlytics plugin (for Android Studio and IntelliJ) 144 | com_crashlytics_export_strings.xml 145 | crashlytics.properties 146 | crashlytics-build.properties 147 | fabric.properties 148 | 149 | # Editor-based Rest Client 150 | .idea/httpRequests 151 | 152 | # Android studio 3.1+ serialized cache file 153 | .idea/caches/build_file_checksums.ser 154 | 155 | # Dependencies 156 | deps/* 157 | -------------------------------------------------------------------------------- /API.md: -------------------------------------------------------------------------------- 1 | # Package redisearch Documentation 2 | 3 | 4 | 5 | ## Overview 6 | 7 | `redisearch-py` is a python search engine library that utilizes the RediSearch Redis Module API. 8 | 9 | It is the "official" client of redisearch, and should be regarded as its canonical client implementation. 10 | 11 | The source code can be found at [http://github.com/RedisLabs/redisearch-py](http://github.com/RedisLabs/redisearch-py) 12 | 13 | ### Example: Using the Python Client 14 | 15 | ```py 16 | 17 | from redisearch import Client, TextField, NumericField, Query 18 | 19 | # Creating a client with a given index name 20 | client = Client('myIndex') 21 | 22 | # Creating the index definition and schema 23 | client.create_index([TextField('title', weight=5.0), TextField('body')]) 24 | 25 | # Indexing a document 26 | client.add_document('doc1', title = 'RediSearch', body = 'Redisearch impements a search engine on top of redis') 27 | 28 | # Simple search 29 | res = client.search("search engine") 30 | 31 | # the result has the total number of results, and a list of documents 32 | print res.total # "1" 33 | print res.docs[0].title 34 | 35 | # Searching with complext parameters: 36 | q = Query("search engine").verbatim().no_content().paging(0,5) 37 | res = client.search(q) 38 | 39 | ``` 40 | 41 | ### Example: Using the Auto Completer Client: 42 | 43 | ```py 44 | 45 | # Using the auto-completer 46 | ac = AutoCompleter('ac') 47 | 48 | # Adding some terms 49 | ac.add_suggestions(Suggestion('foo', 5.0), Suggestion('bar', 1.0)) 50 | 51 | # Getting suggestions 52 | suggs = ac.get_suggestions('goo') # returns nothing 53 | 54 | suggs = ac.get_suggestions('goo', fuzzy = True) # returns ['foo'] 55 | 56 | ``` 57 | 58 | ### Installing 59 | 60 | 1. Install redis 4.0 RC2 or above 61 | 62 | 2. [Install RediSearch](http://redisearch.io/Quick_Start/#building-and-running) 63 | 64 | 3. Install the python client 65 | 66 | ```sh 67 | $ pip install redisearch 68 | ``` 69 | 70 | ## Class AutoCompleter 71 | A client to RediSearch's AutoCompleter API 72 | 73 | It provides prefix searches with optionally fuzzy matching of prefixes 74 | ### \_\_init\_\_ 75 | ```py 76 | 77 | def __init__(self, key, host='localhost', port=6379, conn=None) 78 | 79 | ``` 80 | 81 | 82 | 83 | Create a new AutoCompleter client for the given key, and optional host and port 84 | 85 | If conn is not None, we employ an already existing redis connection 86 | 87 | 88 | ### add\_suggestions 89 | ```py 90 | 91 | def add_suggestions(self, *suggestions, **kwargs) 92 | 93 | ``` 94 | 95 | 96 | 97 | Add suggestion terms to the AutoCompleter engine. Each suggestion has a score and string. 98 | 99 | If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores 100 | 101 | 102 | ### delete 103 | ```py 104 | 105 | def delete(self, string) 106 | 107 | ``` 108 | 109 | 110 | 111 | Delete a string from the AutoCompleter index. 112 | Returns 1 if the string was found and deleted, 0 otherwise 113 | 114 | 115 | ### get\_suggestions 116 | ```py 117 | 118 | def get_suggestions(self, prefix, fuzzy=False, num=10, with_scores=False, with_payloads=False) 119 | 120 | ``` 121 | 122 | 123 | 124 | Get a list of suggestions from the AutoCompleter, for a given prefix 125 | 126 | ### Parameters: 127 | - **prefix**: the prefix we are searching. **Must be valid ascii or utf-8** 128 | - **fuzzy**: If set to true, the prefix search is done in fuzzy mode. 129 | **NOTE**: Running fuzzy searches on short (<3 letters) prefixes can be very slow, and even scan the entire index. 130 | - **with_scores**: if set to true, we also return the (refactored) score of each suggestion. 131 | This is normally not needed, and is NOT the original score inserted into the index 132 | - **with_payloads**: Return suggestion payloads 133 | - **num**: The maximum number of results we return. Note that we might return less. The algorithm trims irrelevant suggestions. 134 | 135 | Returns a list of Suggestion objects. If with_scores was False, the score of all suggestions is 1. 136 | 137 | 138 | ### len 139 | ```py 140 | 141 | def len(self) 142 | 143 | ``` 144 | 145 | 146 | 147 | Return the number of entries in the AutoCompleter index 148 | 149 | 150 | 151 | 152 | ## Class Client 153 | A client for the RediSearch module. 154 | It abstracts the API of the module and lets you just use the engine 155 | ### \_\_init\_\_ 156 | ```py 157 | 158 | def __init__(self, index_name, host='localhost', port=6379, conn=None) 159 | 160 | ``` 161 | 162 | 163 | 164 | Create a new Client for the given index_name, and optional host and port 165 | 166 | If conn is not None, we employ an already existing redis connection 167 | 168 | 169 | ### add\_document 170 | ```py 171 | 172 | def add_document(self, doc_id, nosave=False, score=1.0, payload=None, replace=False, partial=False, language=None, **fields) 173 | 174 | ``` 175 | 176 | 177 | 178 | Add a single document to the index. 179 | 180 | ### Parameters 181 | 182 | - **doc_id**: the id of the saved document. 183 | - **nosave**: if set to true, we just index the document, and don't save a copy of it. This means that searches will just return ids. 184 | - **score**: the document ranking, between 0.0 and 1.0 185 | - **payload**: optional inner-index payload we can save for fast access in scoring functions 186 | - **replace**: if True, and the document already is in the index, we perform an update and reindex the document 187 | - **partial**: if True, the fields specified will be added to the existing document. 188 | This has the added benefit that any fields specified with `no_index` 189 | will not be reindexed again. Implies `replace` 190 | - **language**: Specify the language used for document tokenization. 191 | - **fields** kwargs dictionary of the document fields to be saved and/or indexed. 192 | NOTE: Geo points shoule be encoded as strings of "lon,lat" 193 | 194 | 195 | ### add\_document\_hash 196 | ```py 197 | 198 | def add_document_hash(self, doc_id, score=1.0, language=None, replace=False) 199 | 200 | ``` 201 | 202 | 203 | 204 | Add a hash document to the index. 205 | 206 | ### Parameters 207 | 208 | - **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs. 209 | - **score**: the document ranking, between 0.0 and 1.0 210 | - **replace**: if True, and the document already is in the index, we perform an update and reindex the document 211 | - **language**: Specify the language used for document tokenization. 212 | 213 | 214 | ### aggregate 215 | ```py 216 | 217 | def aggregate(self, query) 218 | 219 | ``` 220 | 221 | 222 | 223 | Issue an aggregation query 224 | 225 | ### Parameters 226 | 227 | **query**: This can be either an `AggeregateRequest`, or a `Cursor` 228 | 229 | An `AggregateResult` object is returned. You can access the rows from its 230 | `rows` property, which will always yield the rows of the result 231 | 232 | 233 | ### alter\_schema\_add 234 | ```py 235 | 236 | def alter_schema_add(self, fields) 237 | 238 | ``` 239 | 240 | 241 | 242 | Alter the existing search index by adding new fields. The index must already exist. 243 | 244 | ### Parameters: 245 | 246 | - **fields**: a list of Field objects to add for the index 247 | 248 | 249 | ### batch\_indexer 250 | ```py 251 | 252 | def batch_indexer(self, chunk_size=100) 253 | 254 | ``` 255 | 256 | 257 | 258 | Create a new batch indexer from the client with a given chunk size 259 | 260 | 261 | ### create\_index 262 | ```py 263 | 264 | def create_index(self, fields, no_term_offsets=False, no_field_flags=False, stopwords=None) 265 | 266 | ``` 267 | 268 | 269 | 270 | Create the search index. The index must not already exist. 271 | 272 | ### Parameters: 273 | 274 | - **fields**: a list of TextField or NumericField objects 275 | - **no_term_offsets**: If true, we will not save term offsets in the index 276 | - **no_field_flags**: If true, we will not save field flags that allow searching in specific fields 277 | - **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty 278 | 279 | 280 | ### delete\_document 281 | ```py 282 | 283 | def delete_document(self, doc_id, conn=None, delete_actual_document=False) 284 | 285 | ``` 286 | 287 | 288 | 289 | Delete a document from index 290 | Returns 1 if the document was deleted, 0 if not 291 | 292 | ### Parameters 293 | 294 | - **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index 295 | 296 | ### drop\_index 297 | ```py 298 | 299 | def drop_index(self) 300 | 301 | ``` 302 | 303 | 304 | 305 | Drop the index if it exists 306 | 307 | 308 | ### explain 309 | ```py 310 | 311 | def explain(self, query) 312 | 313 | ``` 314 | 315 | 316 | 317 | ### info 318 | ```py 319 | 320 | def info(self) 321 | 322 | ``` 323 | 324 | 325 | 326 | Get info an stats about the the current index, including the number of documents, memory consumption, etc 327 | 328 | 329 | ### load\_document 330 | ```py 331 | 332 | def load_document(self, id) 333 | 334 | ``` 335 | 336 | 337 | 338 | Load a single document by id 339 | 340 | 341 | ### search 342 | ```py 343 | 344 | def search(self, query) 345 | 346 | ``` 347 | 348 | 349 | 350 | Search the index for a given query, and return a result of documents 351 | 352 | ### Parameters 353 | 354 | - **query**: the search query. Either a text for simple queries with default parameters, or a Query object for complex queries. 355 | See RediSearch's documentation on query format 356 | 357 | ## Class BatchIndexer 358 | A batch indexer allows you to automatically batch 359 | document indexeing in pipelines, flushing it every N documents. 360 | ### \_\_init\_\_ 361 | ```py 362 | 363 | def __init__(self, client, chunk_size=1000) 364 | 365 | ``` 366 | 367 | 368 | 369 | ### add\_document 370 | ```py 371 | 372 | def add_document(self, doc_id, nosave=False, score=1.0, payload=None, replace=False, partial=False, **fields) 373 | 374 | ``` 375 | 376 | 377 | 378 | Add a document to the batch query 379 | 380 | 381 | ### add\_document\_hash 382 | ```py 383 | 384 | def add_document_hash(self, doc_id, score=1.0, language=None, replace=False) 385 | 386 | ``` 387 | 388 | 389 | 390 | Add a hash document to the batch query 391 | 392 | 393 | ### commit 394 | ```py 395 | 396 | def commit(self) 397 | 398 | ``` 399 | 400 | 401 | 402 | Manually commit and flush the batch indexing query 403 | 404 | 405 | 406 | 407 | 408 | 409 | ## Class Document 410 | Represents a single document in a result set 411 | ### \_\_init\_\_ 412 | ```py 413 | 414 | def __init__(self, id, payload=None, **fields) 415 | 416 | ``` 417 | 418 | 419 | 420 | 421 | 422 | ## Class GeoField 423 | GeoField is used to define a geo-indexing field in a schema defintion 424 | ### \_\_init\_\_ 425 | ```py 426 | 427 | def __init__(self, name) 428 | 429 | ``` 430 | 431 | 432 | 433 | ### redis\_args 434 | ```py 435 | 436 | def redis_args(self) 437 | 438 | ``` 439 | 440 | 441 | 442 | 443 | 444 | ## Class GeoFilter 445 | None 446 | ### \_\_init\_\_ 447 | ```py 448 | 449 | def __init__(self, field, lon, lat, radius, unit='km') 450 | 451 | ``` 452 | 453 | 454 | 455 | 456 | 457 | ## Class NumericField 458 | NumericField is used to define a numeric field in a schema defintion 459 | ### \_\_init\_\_ 460 | ```py 461 | 462 | def __init__(self, name, sortable=False, no_index=False) 463 | 464 | ``` 465 | 466 | 467 | 468 | ### redis\_args 469 | ```py 470 | 471 | def redis_args(self) 472 | 473 | ``` 474 | 475 | 476 | 477 | 478 | 479 | ## Class NumericFilter 480 | None 481 | ### \_\_init\_\_ 482 | ```py 483 | 484 | def __init__(self, field, minval, maxval, minExclusive=False, maxExclusive=False) 485 | 486 | ``` 487 | 488 | 489 | 490 | 491 | 492 | ## Class Query 493 | Query is used to build complex queries that have more parameters than just the query string. 494 | The query string is set in the constructor, and other options have setter functions. 495 | 496 | The setter functions return the query object, so they can be chained, 497 | i.e. `Query("foo").verbatim().filter(...)` etc. 498 | ### \_\_init\_\_ 499 | ```py 500 | 501 | def __init__(self, query_string) 502 | 503 | ``` 504 | 505 | 506 | 507 | Create a new query object. 508 | The query string is set in the constructor, and other options have setter functions. 509 | 510 | 511 | ### add\_filter 512 | ```py 513 | 514 | def add_filter(self, flt) 515 | 516 | ``` 517 | 518 | 519 | 520 | Add a numeric or geo filter to the query. 521 | **Currently only one of each filter is supported by the engine** 522 | 523 | - **flt**: A NumericFilter or GeoFilter object, used on a corresponding field 524 | 525 | 526 | ### get\_args 527 | ```py 528 | 529 | def get_args(self) 530 | 531 | ``` 532 | 533 | 534 | 535 | Format the redis arguments for this query and return them 536 | 537 | 538 | ### highlight 539 | ```py 540 | 541 | def highlight(self, fields=None, tags=None) 542 | 543 | ``` 544 | 545 | 546 | 547 | Apply specified markup to matched term(s) within the returned field(s) 548 | 549 | - **fields** If specified then only those mentioned fields are highlighted, otherwise all fields are highlighted 550 | - **tags** A list of two strings to surround the match. 551 | 552 | 553 | ### in\_order 554 | ```py 555 | 556 | def in_order(self) 557 | 558 | ``` 559 | 560 | 561 | 562 | Match only documents where the query terms appear in the same order in the document. 563 | i.e. for the query 'hello world', we do not match 'world hello' 564 | 565 | 566 | ### language 567 | ```py 568 | 569 | def language(self, language) 570 | 571 | ``` 572 | 573 | 574 | 575 | Analyze the query as being in the specified language 576 | :param language: The language (e.g. `chinese` or `english`) 577 | 578 | 579 | ### limit\_fields 580 | ```py 581 | 582 | def limit_fields(self, *fields) 583 | 584 | ``` 585 | 586 | 587 | 588 | Limit the search to specific TEXT fields only 589 | 590 | - **fields**: A list of strings, case sensitive field names from the defined schema 591 | 592 | 593 | ### limit\_ids 594 | ```py 595 | 596 | def limit_ids(self, *ids) 597 | 598 | ``` 599 | 600 | 601 | 602 | Limit the results to a specific set of pre-known document ids of any length 603 | 604 | 605 | ### no\_content 606 | ```py 607 | 608 | def no_content(self) 609 | 610 | ``` 611 | 612 | 613 | 614 | Set the query to only return ids and not the document content 615 | 616 | 617 | ### no\_stopwords 618 | ```py 619 | 620 | def no_stopwords(self) 621 | 622 | ``` 623 | 624 | 625 | 626 | Prevent the query from being filtered for stopwords. 627 | Only useful in very big queries that you are certain contain no stopwords. 628 | 629 | 630 | ### paging 631 | ```py 632 | 633 | def paging(self, offset, num) 634 | 635 | ``` 636 | 637 | 638 | 639 | Set the paging for the query (defaults to 0..10). 640 | 641 | - **offset**: Paging offset for the results. Defaults to 0 642 | - **num**: How many results do we want 643 | 644 | 645 | ### query\_string 646 | ```py 647 | 648 | def query_string(self) 649 | 650 | ``` 651 | 652 | 653 | 654 | Return the query string of this query only 655 | 656 | 657 | ### return\_fields 658 | ```py 659 | 660 | def return_fields(self, *fields) 661 | 662 | ``` 663 | 664 | 665 | 666 | Only return values from these fields 667 | 668 | 669 | ### slop 670 | ```py 671 | 672 | def slop(self, slop) 673 | 674 | ``` 675 | 676 | 677 | 678 | Allow a masimum of N intervening non matched terms between phrase terms (0 means exact phrase) 679 | 680 | 681 | ### sort\_by 682 | ```py 683 | 684 | def sort_by(self, field, asc=True) 685 | 686 | ``` 687 | 688 | 689 | 690 | Add a sortby field to the query 691 | 692 | - **field** - the name of the field to sort by 693 | - **asc** - when `True`, sorting will be done in asceding order 694 | 695 | 696 | ### summarize 697 | ```py 698 | 699 | def summarize(self, fields=None, context_len=None, num_frags=None, sep=None) 700 | 701 | ``` 702 | 703 | 704 | 705 | Return an abridged format of the field, containing only the segments of 706 | the field which contain the matching term(s). 707 | 708 | If `fields` is specified, then only the mentioned fields are 709 | summarized; otherwise all results are summarized. 710 | 711 | Server side defaults are used for each option (except `fields`) if not specified 712 | 713 | - **fields** List of fields to summarize. All fields are summarized if not specified 714 | - **context_len** Amount of context to include with each fragment 715 | - **num_frags** Number of fragments per document 716 | - **sep** Separator string to separate fragments 717 | 718 | 719 | ### verbatim 720 | ```py 721 | 722 | def verbatim(self) 723 | 724 | ``` 725 | 726 | 727 | 728 | Set the query to be verbatim, i.e. use no query expansion or stemming 729 | 730 | 731 | ### with\_payloads 732 | ```py 733 | 734 | def with_payloads(self) 735 | 736 | ``` 737 | 738 | 739 | 740 | Ask the engine to return document payloads 741 | 742 | 743 | 744 | 745 | ## Class Result 746 | Represents the result of a search query, and has an array of Document objects 747 | ### \_\_init\_\_ 748 | ```py 749 | 750 | def __init__(self, res, hascontent, duration=0, has_payload=False) 751 | 752 | ``` 753 | 754 | 755 | 756 | - **snippets**: An optional dictionary of the form {field: snippet_size} for snippet formatting 757 | 758 | 759 | 760 | 761 | ## Class SortbyField 762 | None 763 | ### \_\_init\_\_ 764 | ```py 765 | 766 | def __init__(self, field, asc=True) 767 | 768 | ``` 769 | 770 | 771 | 772 | 773 | 774 | ## Class Suggestion 775 | Represents a single suggestion being sent or returned from the auto complete server 776 | ### \_\_init\_\_ 777 | ```py 778 | 779 | def __init__(self, string, score=1.0, payload=None) 780 | 781 | ``` 782 | 783 | 784 | 785 | 786 | 787 | ## Class TagField 788 | TagField is a tag-indexing field with simpler compression and tokenization. 789 | See http://redisearch.io/Tags/ 790 | ### \_\_init\_\_ 791 | ```py 792 | 793 | def __init__(self, name, separator=',', no_index=False) 794 | 795 | ``` 796 | 797 | 798 | 799 | ### redis\_args 800 | ```py 801 | 802 | def redis_args(self) 803 | 804 | ``` 805 | 806 | 807 | 808 | 809 | 810 | ## Class TextField 811 | TextField is used to define a text field in a schema definition 812 | ### \_\_init\_\_ 813 | ```py 814 | 815 | def __init__(self, name, weight=1.0, sortable=False, no_stem=False, no_index=False) 816 | 817 | ``` 818 | 819 | 820 | 821 | ### redis\_args 822 | ```py 823 | 824 | def redis_args(self) 825 | 826 | ``` 827 | 828 | 829 | 830 | 831 | 832 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM redislabs/redisearch:edge as builder 2 | 3 | RUN apt update && apt install -y python3 python3-pip 4 | ADD . /build 5 | WORKDIR /build 6 | RUN pip3 install poetry 7 | RUN poetry config virtualenvs.create false 8 | RUN poetry build 9 | 10 | ### clean docker stage 11 | FROM redislabs/redisearch:edge as runner 12 | 13 | RUN apt update && apt install -y python3 python3-pip git 14 | RUN rm -rf /var/cache/apt/ 15 | 16 | COPY --from=builder /build/dist/redisearch*.tar.gz /tmp/ 17 | RUN pip3 install /tmp/redisearch*.tar.gz 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, Redis Labs 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/github/license/RediSearch/redisearch-py.svg)](https://github.com/RediSearch/redisearch-py/blob/master/LICENSE) 2 | [![PyPI version](https://badge.fury.io/py/redisearch.svg)](https://badge.fury.io/py/redisearch) 3 | [![GitHub issues](https://img.shields.io/github/release/RediSearch/redisearch-py.svg)](https://github.com/RediSearch/redisearch-py/releases/latest) 4 | [![Codecov](https://codecov.io/gh/RediSearch/redisearch-py/branch/master/graph/badge.svg)](https://codecov.io/gh/RediSearch/redisearch-py) 5 | [![Known Vulnerabilities](https://snyk.io/test/github/RediSearch/redisearch-py/badge.svg?targetFile=pyproject.toml)](https://snyk.io/test/github/RediSearch/redisearch-py?targetFile=pyproject.toml) 6 | [![Total alerts](https://img.shields.io/lgtm/alerts/g/RediSearch/redisearch-py.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/RediSearch/redisearch-py/alerts/) 7 | 8 | # RediSearch Python Client 9 | [![Forum](https://img.shields.io/badge/Forum-RediSearch-blue)](https://forum.redislabs.com/c/modules/redisearch/) 10 | [![Discord](https://img.shields.io/discord/697882427875393627?style=flat-square)](https://discord.gg/xTbqgTB) 11 | 12 | ## Deprecation notice 13 | 14 | As of [redis-py 4.0.0](https://pypi.org/project/redis/4.0.0) this library is deprecated. It's features have been merged into redis-py. Please either install it [from pypy](https://pypi.org/project/redis) or [the repo](https://github.com/redis/redis-py). 15 | 16 | -------------------------------- 17 | 18 | This is a Python search engine library that utilizes the [RediSearch Redis Module](http://redisearch.io) API. 19 | 20 | It is the "official" client of RediSearch, and should be regarded as its canonical client implementation. 21 | 22 | ## Features 23 | 24 | RediSearch is a source avaliable ([RSAL](https://raw.githubusercontent.com/RediSearch/RediSearch/master/LICENSE)), high performance search engine implemented as a [Redis Module](https://redis.io/topics/modules-intro). 25 | It uses custom data types to allow fast, stable and feature rich full-text search inside Redis. 26 | 27 | This client is a wrapper around the RediSearch API protocol, that allows you to utilize its features easily. 28 | 29 | ### RediSearch's features include: 30 | 31 | * Full-Text indexing of multiple fields in documents. 32 | * Incremental indexing without performance loss. 33 | * Document ranking (provided manually by the user at index time) and field weights. 34 | * Auto-complete suggestions (with fuzzy prefix suggestions). 35 | * Exact Phrase Search. 36 | * Stemming based query expansion in [many languages](http://redisearch.io/Stemming/) (using [Snowball](http://snowballstem.org/)). 37 | * Limiting searches to specific document fields (up to 8 fields supported). 38 | * Numeric filters and ranges. 39 | * Automatically index existing HASH keys as documents. 40 | 41 | For more details, visit [http://redisearch.io](http://redisearch.io) 42 | 43 | ## Examples 44 | 45 | ### Creating a client instance 46 | 47 | When you create a redisearch-py client instance, the only required argument 48 | is the name of the index. 49 | 50 | ```py 51 | from redisearch import Client 52 | 53 | client = Client("my-index") 54 | ``` 55 | 56 | To connect with a username and/or password, pass those options to the client 57 | initializer. 58 | 59 | ```py 60 | client = Client("my-index", password="my-password") 61 | ``` 62 | 63 | ### Using core Redis commands 64 | 65 | Every instance of `Client` contains an instance of the redis-py `Client` as 66 | well. Use this object to run core Redis commands. 67 | 68 | ```py 69 | import datetime 70 | 71 | from redisearch import Client 72 | 73 | START_TIME = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M.%S") 74 | 75 | client = Client("my-index") 76 | 77 | client.redis.set("start-time", START_TIME) 78 | ``` 79 | 80 | ### Checking if a RediSearch index exists 81 | 82 | To check if a RediSearch index exists, use the `FT.INFO` command and catch 83 | the `ResponseError` raised if the index does not exist. 84 | 85 | ```py 86 | from redis import ResponseError 87 | from redisearch import Client 88 | 89 | client = Client("my-index") 90 | 91 | try: 92 | client.info() 93 | except ResponseError: 94 | # Index does not exist. We need to create it! 95 | ``` 96 | 97 | ### Defining a search index 98 | 99 | Use an instance of `IndexDefinition` to define a search index. You only need 100 | to do this when you create an index. 101 | 102 | RediSearch indexes follow Hashes in your Redis databases by watching *key 103 | prefixes*. If a Hash whose key starts with one of the search index's 104 | configured key prefixes is added, updated, or deleted from Redis, RediSearch 105 | will make those changes in the index. You configure a search index's key 106 | prefixes using the `prefix` parameter of the `IndexDefinition` initializer. 107 | 108 | **NOTE**: Once you create an index, RediSearch will continuously index these 109 | keys when their Hashes change. 110 | 111 | `IndexDefinition` also takes a *schema*. The schema specifies which fields to 112 | index from within the Hashes that the index follows. The field types are: 113 | 114 | * TextField 115 | * TagField 116 | * NumericField 117 | * GeoField 118 | 119 | For more information on what these field types mean, consult the [RediSearch 120 | documentation](https://oss.redislabs.com/redisearch/Commands/#ftcreate) on 121 | the `FT.CREATE` command. 122 | 123 | With redisearch-py, the schema is an iterable of `Field` instances. Once you 124 | have an `IndexDefinition` instance, you can create the instance by passing a 125 | schema iterable to the `create_index()` method. 126 | 127 | ```py 128 | from redis import ResponseError 129 | from redisearch import Client, IndexDefinition, TextField 130 | 131 | SCHEMA = ( 132 | TextField("title", weight=5.0), 133 | TextField("body") 134 | ) 135 | 136 | client = Client("my-index") 137 | 138 | definition = IndexDefinition(prefix=['blog:']) 139 | 140 | try: 141 | client.info() 142 | except ResponseError: 143 | # Index does not exist. We need to create it! 144 | client.create_index(SCHEMA, definition=definition) 145 | ``` 146 | 147 | ### Indexing a document 148 | 149 | A RediSearch 2.0 index continually follows Hashes with the key prefixes you 150 | defined, so if you want to add a document to the index, you only need to 151 | create a Hash with one of those prefixes. 152 | 153 | ```py 154 | # Indexing a document with RediSearch 2.0. 155 | doc = { 156 | 'title': 'RediSearch', 157 | 'body': 'Redisearch adds querying, indexing, and full-text search to Redis' 158 | } 159 | client.redis.hset('doc:1', mapping=doc) 160 | ``` 161 | 162 | Past versions of RediSearch required that you call the `add_document()` 163 | method. This method is deprecated, but we include its usage here for 164 | reference. 165 | 166 | ```py 167 | # Indexing a document for RediSearch 1.x 168 | client.add_document( 169 | "doc:2", 170 | title="RediSearch", 171 | body="Redisearch implements a search engine on top of redis", 172 | ) 173 | ``` 174 | 175 | ### Querying 176 | 177 | #### Basic queries 178 | 179 | Use the `search()` method to perform basic full-text and field-specific 180 | searches. This method doesn't take many of the options available to the 181 | RediSearch `FT.SEARCH` command -- read the section on building complex 182 | queries later in this document for information on how to use those. 183 | 184 | ```py 185 | res = client.search("evil wizards") 186 | ``` 187 | #### Result objects 188 | 189 | Results are wrapped in a `Result` object that includes the number of results 190 | and a list of matching documents. 191 | 192 | ```py 193 | >>> print(res.total) 194 | 2 195 | >>> print(res.docs[0].title) 196 | "Wizard Story 2: Evil Wizards Strike Back" 197 | ``` 198 | 199 | #### Building complex queries 200 | 201 | You can use the `Query` object to build complex queries: 202 | 203 | ```py 204 | q = Query("evil wizards").verbatim().no_content().with_scores().paging(0, 5) 205 | res = client.search(q) 206 | ``` 207 | 208 | For an explanation of these options, see the [RediSearch 209 | documentation](https://oss.redislabs.com/redisearch/Commands/#ftsearch) for 210 | the `FT.SEARCH` command. 211 | 212 | #### Query syntax 213 | 214 | The default behavior of queries is to run a full-text search across all 215 | `TEXT` fields in the index for the intersection of all terms in the query. 216 | 217 | So the example given in the "Basic queries" section of this README, 218 | `client.search("evil wizards")`, run a full-text search for the intersection 219 | of "evil" and "wizard" in all `TEXT` fields. 220 | 221 | Many more types of queries are possible, however! The string you pass into 222 | the `search()` method or `Query()` initializer has the full range of query 223 | syntax available in RediSearch. 224 | 225 | For example, a full-text search against a specific `TEXT` field in the index 226 | looks like this: 227 | 228 | ```py 229 | # Full-text search 230 | res = client.search("@title:evil wizards") 231 | ``` 232 | 233 | Finding books published in 2020 or 2021 looks like this: 234 | 235 | ```python 236 | client.search("@published_year:[2020 2021]") 237 | ``` 238 | 239 | To learn more, see the [RediSearch 240 | documentation](https://oss.redislabs.com/redisearch/Query_Syntax/) on query 241 | syntax. 242 | 243 | ### Aggregations 244 | 245 | This library contains a programmatic interface to run [aggregation 246 | queries](https://oss.redislabs.com/redisearch/Aggregations/) with RediSearch. 247 | 248 | #### Making an aggregation query 249 | 250 | To make an aggregation query, pass an instance of the `AggregateRequest` 251 | class to the `search()` method of an instance of `Client`. 252 | 253 | For example, here is what finding the most books published in a single year 254 | looks like: 255 | 256 | ```py 257 | from redisearch import Client 258 | from redisearch import reducers 259 | from redisearch.aggregation import AggregateRequest 260 | 261 | client = Client('books-idx') 262 | 263 | request = AggregateRequest('*').group_by( 264 | '@published_year', reducers.count().alias("num_published") 265 | ).group_by( 266 | [], reducers.max("@num_published").alias("max_books_published_per_year") 267 | ) 268 | 269 | result = client.aggregate(request) 270 | ``` 271 | 272 | #### A redis-cli equivalent query 273 | 274 | The aggregation query just given is equivalent to the following 275 | `FT.AGGREGATE` command entered directly into the redis-cli: 276 | 277 | ```sql 278 | FT.AGGREGATE books-idx * 279 | GROUPBY 1 @published_year 280 | REDUCE COUNT 0 AS num_published 281 | GROUPBY 0 282 | REDUCE MAX 1 @num_published AS max_books_published_per_year 283 | ``` 284 | 285 | #### The AggregateResult object 286 | 287 | Aggregation queries return an `AggregateResult` object that contains the rows 288 | returned for the query and a cursor if you're using the [cursor 289 | API](https://oss.redislabs.com/redisearch/Aggregations/#cursor_api). 290 | 291 | ```py 292 | from redisearch.aggregation import AggregateRequest, Asc 293 | 294 | request = AggregateRequest('*').group_by( 295 | ['@published_year'], reducers.avg('average_rating').alias('average_rating_for_year') 296 | ).sort_by( 297 | Asc('@average_rating_for_year') 298 | ).limit( 299 | 0, 10 300 | ).filter('@published_year > 0') 301 | 302 | ... 303 | 304 | 305 | In [53]: resp = c.aggregate(request) 306 | In [54]: resp.rows 307 | Out[54]: 308 | [['published_year', '1914', 'average_rating_for_year', '0'], 309 | ['published_year', '2009', 'average_rating_for_year', '1.39166666667'], 310 | ['published_year', '2011', 'average_rating_for_year', '2.046'], 311 | ['published_year', '2010', 'average_rating_for_year', '3.125'], 312 | ['published_year', '2012', 'average_rating_for_year', '3.41'], 313 | ['published_year', '1967', 'average_rating_for_year', '3.603'], 314 | ['published_year', '1970', 'average_rating_for_year', '3.71875'], 315 | ['published_year', '1966', 'average_rating_for_year', '3.72666666667'], 316 | ['published_year', '1927', 'average_rating_for_year', '3.77']] 317 | ``` 318 | 319 | #### Reducer functions 320 | 321 | Notice from the example that we used an object from the `reducers` module. 322 | See the [RediSearch documentation](https://oss.redislabs.com/redisearch/Aggregations/#groupby_reducers) 323 | for more examples of reducer functions you can use when grouping results. 324 | 325 | Reducer functions include an `alias()` method that gives the result of the 326 | reducer a specific name. If you don't supply a name, RediSearch will generate 327 | one. 328 | 329 | #### Grouping by zero, one, or multiple fields 330 | 331 | The `group_by` statement can take a single field name as a string, or multiple 332 | field names as a list of strings. 333 | 334 | ```py 335 | AggregateRequest('*').group_by('@published_year', reducers.count()) 336 | 337 | AggregateRequest('*').group_by( 338 | ['@published_year', '@average_rating'], 339 | reducers.count()) 340 | ``` 341 | 342 | To run a reducer function on every result from an aggregation query, pass an 343 | empty list to `group_by()`, which is equivalent to passing the option 344 | `GROUPBY 0` when writing an aggregation in the redis-cli. 345 | 346 | ```py 347 | AggregateRequest('*').group_by([], reducers.max("@num_published")) 348 | ``` 349 | 350 | **NOTE**: Aggregation queries require at least one `group_by()` method call. 351 | 352 | #### Sorting and limiting 353 | 354 | Using an `AggregateRequest` instance, you can sort with the `sort_by()` method 355 | and limit with the `limit()` method. 356 | 357 | For example, finding the average rating of books published each year, sorting 358 | by the average rating for the year, and returning only the first ten results: 359 | 360 | ```py 361 | from redisearch import Client 362 | from redisearch.aggregation import AggregateRequest, Asc 363 | 364 | c = Client() 365 | 366 | request = AggregateRequest('*').group_by( 367 | ['@published_year'], reducers.avg('average_rating').alias('average_rating_for_year') 368 | ).sort_by( 369 | Asc('@average_rating_for_year') 370 | ).limit(0, 10) 371 | 372 | c.aggregate(request) 373 | ``` 374 | 375 | **NOTE**: The first option to `limit()` is a zero-based offset, and the second 376 | option is the number of results to return. 377 | 378 | #### Filtering 379 | 380 | Use filtering to reject results of an aggregation query after your reducer 381 | functions run. For example, calculating the average rating of books published 382 | each year and only returning years with an average rating higher than 3: 383 | 384 | ```py 385 | from redisearch.aggregation import AggregateRequest, Asc 386 | 387 | req = AggregateRequest('*').group_by( 388 | ['@published_year'], reducers.avg('average_rating').alias('average_rating_for_year') 389 | ).sort_by( 390 | Asc('@average_rating_for_year') 391 | ).filter('@average_rating_for_year > 3') 392 | ``` 393 | 394 | ## Installing 395 | 396 | 1. [Install RediSearch](http://redisearch.io/Quick_Start) 397 | 2. Install the Python client: 398 | 399 | ```sh 400 | $ pip install redisearch 401 | ``` 402 | 403 | ## Developing 404 | 405 | 1. Create a virtualenv to manage your python dependencies, and ensure it's active. 406 | ```virtualenv -v venv``` 407 | 2. Install [pypoetry](https://python-poetry.org/) to manage your dependencies. 408 | ```pip install --user poetry``` 409 | 3. Install dependencies. 410 | ```poetry install``` 411 | 412 | Note: Due to an [interaction between](https://github.com/python-poetry/poetry/issues/4210) and python 3.10, you *may* need to run the following, if you receive a JSONError while installing packages. 413 | ``` 414 | poetry config experimental.new-installer false 415 | ``` 416 | 417 | ## Testing 418 | 419 | Testing can easily be performed using using Docker. 420 | Run the following: 421 | 422 | ``` 423 | make -C test/docker test PYTHON_VER=3 424 | ``` 425 | 426 | (Replace `PYTHON_VER=3` with `PYTHON_VER=2` to test with Python 2.7.) 427 | 428 | Alternatively, use the following procedure: 429 | 430 | First, run: 431 | 432 | ``` 433 | PYTHON_VER=3 ./test/test-setup.sh 434 | ``` 435 | 436 | This will set up a Python virtual environment in `venv3` (or in `venv2` if `PYTHON_VER=2` is used). 437 | 438 | Afterwards, run RediSearch in a container as a daemon: 439 | 440 | ``` 441 | docker run -d -p 6379:6379 redislabs/redisearch:2.0.0 442 | ``` 443 | 444 | Finally, invoke the virtual environment and run the tests: 445 | 446 | ``` 447 | . ./venv3/bin/activate 448 | REDIS_PORT=6379 python test/test.py 449 | REDIS_PORT=6379 python test/test_builder.py 450 | ``` 451 | -------------------------------------------------------------------------------- /gendoc.py: -------------------------------------------------------------------------------- 1 | 2 | import pydoc 3 | import os, sys 4 | 5 | module_header = "# Package {} Documentation\n" 6 | class_header = "## Class {}" 7 | function_header = "### {}" 8 | 9 | 10 | def getmarkdown(module): 11 | output = [ module_header.format(module.__name__) ] 12 | 13 | if module.__doc__: 14 | output.append(module.__doc__) 15 | 16 | output.extend(getclasses(module)) 17 | return "\n".join((str(x) for x in output)) 18 | 19 | def getclasses(item): 20 | output = list() 21 | for cl in pydoc.inspect.getmembers(item, pydoc.inspect.isclass): 22 | 23 | if cl[0] != "__class__" and not cl[0].startswith("_"): 24 | # Consider anything that starts with _ private 25 | # and don't document it 26 | output.append( class_header.format(cl[0])) 27 | # Get the docstring 28 | output.append(pydoc.inspect.getdoc(cl[1])) 29 | # Get the functions 30 | output.extend(getfunctions(cl[1])) 31 | # Recurse into any subclasses 32 | output.extend(getclasses(cl[1])) 33 | output.append('\n') 34 | return output 35 | 36 | 37 | def getfunctions(item): 38 | output = list() 39 | #print item 40 | for func in pydoc.inspect.getmembers(item, pydoc.inspect.ismethod): 41 | 42 | if func[0].startswith('_') and func[0] != '__init__': 43 | continue 44 | 45 | output.append(function_header.format(func[0].replace('_', '\\_'))) 46 | 47 | # Get the signature 48 | output.append ('```py\n') 49 | output.append('def %s%s\n' % (func[0], pydoc.inspect.formatargspec(*pydoc.inspect.getargspec(func[1])))) 50 | output.append ('```\n') 51 | 52 | # get the docstring 53 | if pydoc.inspect.getdoc(func[1]): 54 | output.append('\n') 55 | output.append(pydoc.inspect.getdoc(func[1])) 56 | 57 | output.append('\n') 58 | return output 59 | 60 | def generatedocs(module): 61 | try: 62 | sys.path.append(os.getcwd()) 63 | # Attempt import 64 | mod = pydoc.safeimport(module) 65 | if mod is None: 66 | print("Module not found") 67 | 68 | # Module imported correctly, let's create the docs 69 | return getmarkdown(mod) 70 | except pydoc.ErrorDuringImport as e: 71 | print("Error while trying to import " + module) 72 | 73 | if __name__ == '__main__': 74 | print(generatedocs(sys.argv[1])) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "redisearch" 3 | version = "2.1.1" 4 | description = "RedisSearch Python Client" 5 | authors = ["RedisLabs "] 6 | license = "BSD-3-Clause" 7 | readme = "README.md" 8 | 9 | classifiers = [ 10 | 'Topic :: Database', 11 | 'Programming Language :: Python', 12 | 'Intended Audience :: Developers', 13 | 'Programming Language :: Python :: 3.6', 14 | 'Programming Language :: Python :: 3.7', 15 | 'Programming Language :: Python :: 3.8', 16 | 'Programming Language :: Python :: 3.9', 17 | 'Programming Language :: Python :: 3.10', 18 | 'License :: OSI Approved :: BSD License', 19 | 'Development Status :: 5 - Production/Stable' 20 | 21 | ] 22 | keywords = ["Redis Search Extension"] 23 | 24 | [tool.poetry.dependencies] 25 | python = "^3.6.0" 26 | redis = "3.5.3" 27 | six = "^1.16.0" 28 | rmtest = {git = "https://github.com/RedisLabs/rmtest"} 29 | rejson = "^0.5.4" 30 | hiredis = [ 31 | {version = "^2.0.0", python = "^3.6"}, 32 | ] 33 | 34 | 35 | 36 | [tool.poetry.urls] 37 | url = "https://redisearch.io" 38 | repository = "https://github.com/RedisSearch/redisearch-py" 39 | 40 | [tool.poetry.dev-dependencies] 41 | codecov = "^2.1.11" 42 | flake8 = "^4.0.0" 43 | tox = "^3.22.0 || <= 4.0.0" 44 | tox-poetry = "^0.4.0" 45 | bandit = "1.6.0" 46 | vulture = "^2.1" 47 | coverage = "^6.1.1" 48 | virtualenv = "20.9.0" 49 | 50 | [build-system] 51 | requires = ["poetry-core>=1.0.0"] 52 | build-backend = "poetry.core.masonry.api" 53 | -------------------------------------------------------------------------------- /redisearch/__init__.py: -------------------------------------------------------------------------------- 1 | from .result import Result 2 | from .document import Document 3 | from .client import Client, NumericField, TextField, GeoField, TagField, IndexDefinition 4 | from .query import Query, NumericFilter, GeoFilter, SortbyField 5 | from .aggregation import AggregateRequest, AggregateResult 6 | from .auto_complete import AutoCompleter, Suggestion 7 | 8 | 9 | -------------------------------------------------------------------------------- /redisearch/_util.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | def to_string(s): 4 | if isinstance(s, six.string_types): 5 | return s 6 | elif isinstance(s, six.binary_type): 7 | return s.decode('utf-8','ignore') 8 | else: 9 | return s # Not a string we care about 10 | -------------------------------------------------------------------------------- /redisearch/aggregation.py: -------------------------------------------------------------------------------- 1 | from six import string_types 2 | 3 | FIELDNAME = object() 4 | 5 | 6 | class Limit(object): 7 | def __init__(self, offset=0, count=0): 8 | self.offset = offset 9 | self.count = count 10 | 11 | def build_args(self): 12 | if self.count: 13 | return ['LIMIT', str(self.offset), str(self.count)] 14 | else: 15 | return [] 16 | 17 | 18 | class Reducer(object): 19 | """ 20 | Base reducer object for all reducers. 21 | 22 | See the `redisearch.reducers` module for the actual reducers. 23 | """ 24 | NAME = None 25 | 26 | def __init__(self, *args): 27 | self._args = args 28 | self._field = None 29 | self._alias = None 30 | 31 | def alias(self, alias): 32 | """ 33 | Set the alias for this reducer. 34 | 35 | ### Parameters 36 | 37 | - **alias**: The value of the alias for this reducer. If this is the 38 | special value `aggregation.FIELDNAME` then this reducer will be 39 | aliased using the same name as the field upon which it operates. 40 | Note that using `FIELDNAME` is only possible on reducers which 41 | operate on a single field value. 42 | 43 | This method returns the `Reducer` object making it suitable for 44 | chaining. 45 | """ 46 | if alias is FIELDNAME: 47 | if not self._field: 48 | raise ValueError("Cannot use FIELDNAME alias with no field") 49 | # Chop off initial '@' 50 | alias = self._field[1:] 51 | self._alias = alias 52 | return self 53 | 54 | @property 55 | def args(self): 56 | return self._args 57 | 58 | 59 | class SortDirection(object): 60 | """ 61 | This special class is used to indicate sort direction. 62 | """ 63 | DIRSTRING = None 64 | 65 | def __init__(self, field): 66 | self.field = field 67 | 68 | 69 | class Asc(SortDirection): 70 | """ 71 | Indicate that the given field should be sorted in ascending order 72 | """ 73 | DIRSTRING = 'ASC' 74 | 75 | 76 | class Desc(SortDirection): 77 | """ 78 | Indicate that the given field should be sorted in descending order 79 | """ 80 | DIRSTRING = 'DESC' 81 | 82 | 83 | class Group(object): 84 | """ 85 | This object automatically created in the `AggregateRequest.group_by()` 86 | """ 87 | def __init__(self, fields, reducers): 88 | if not reducers: 89 | raise ValueError('Need at least one reducer') 90 | 91 | fields = [fields] if isinstance(fields, string_types) else fields 92 | reducers = [reducers] if isinstance(reducers, Reducer) else reducers 93 | 94 | self.fields = fields 95 | self.reducers = reducers 96 | self.limit = Limit() 97 | 98 | def build_args(self): 99 | ret = ['GROUPBY', str(len(self.fields))] 100 | ret.extend(self.fields) 101 | for reducer in self.reducers: 102 | ret += ['REDUCE', reducer.NAME, str(len(reducer.args))] 103 | ret.extend(reducer.args) 104 | if reducer._alias is not None: 105 | ret += ['AS', reducer._alias] 106 | return ret 107 | 108 | class Projection(object): 109 | """ 110 | This object automatically created in the `AggregateRequest.apply()` 111 | """ 112 | 113 | def __init__(self, projector, alias=None ): 114 | 115 | self.alias = alias 116 | self.projector = projector 117 | 118 | def build_args(self): 119 | ret = ['APPLY', self.projector] 120 | if self.alias is not None: 121 | ret += ['AS', self.alias] 122 | 123 | return ret 124 | 125 | class SortBy(object): 126 | """ 127 | This object automatically created in the `AggregateRequest.sort_by()` 128 | """ 129 | 130 | def __init__(self, fields, max=0): 131 | self.fields = fields 132 | self.max = max 133 | 134 | 135 | 136 | def build_args(self): 137 | fields_args = [] 138 | for f in self.fields: 139 | if isinstance(f, SortDirection): 140 | fields_args += [f.field, f.DIRSTRING] 141 | else: 142 | fields_args += [f] 143 | 144 | ret = ['SORTBY', str(len(fields_args))] 145 | ret.extend(fields_args) 146 | if self.max > 0: 147 | ret += ['MAX', str(self.max)] 148 | 149 | return ret 150 | 151 | 152 | class AggregateRequest(object): 153 | """ 154 | Aggregation request which can be passed to `Client.aggregate`. 155 | """ 156 | def __init__(self, query='*'): 157 | """ 158 | Create an aggregation request. This request may then be passed to 159 | `client.aggregate()`. 160 | 161 | In order for the request to be usable, it must contain at least one 162 | group. 163 | 164 | - **query** Query string for filtering records. 165 | 166 | All member methods (except `build_args()`) 167 | return the object itself, making them useful for chaining. 168 | """ 169 | self._query = query 170 | self._aggregateplan = [] 171 | self._loadfields = [] 172 | self._limit = Limit() 173 | self._max = 0 174 | self._with_schema = False 175 | self._verbatim = False 176 | self._cursor = [] 177 | 178 | def load(self, *fields): 179 | """ 180 | Indicate the fields to be returned in the response. These fields are 181 | returned in addition to any others implicitly specified. 182 | 183 | ### Parameters 184 | 185 | - **fields**: One or more fields in the format of `@field` 186 | """ 187 | self._loadfields.extend(fields) 188 | return self 189 | 190 | def group_by(self, fields, *reducers): 191 | """ 192 | Specify by which fields to group the aggregation. 193 | 194 | ### Parameters 195 | 196 | - **fields**: Fields to group by. This can either be a single string, 197 | or a list of strings. both cases, the field should be specified as 198 | `@field`. 199 | - **reducers**: One or more reducers. Reducers may be found in the 200 | `aggregation` module. 201 | """ 202 | group = Group(fields, reducers) 203 | self._aggregateplan.extend(group.build_args()) 204 | 205 | return self 206 | 207 | def apply(self, **kwexpr): 208 | """ 209 | Specify one or more projection expressions to add to each result 210 | 211 | ### Parameters 212 | 213 | - **kwexpr**: One or more key-value pairs for a projection. The key is 214 | the alias for the projection, and the value is the projection 215 | expression itself, for example `apply(square_root="sqrt(@foo)")` 216 | """ 217 | for alias, expr in kwexpr.items(): 218 | projection = Projection(expr, alias ) 219 | self._aggregateplan.extend(projection.build_args()) 220 | 221 | return self 222 | 223 | def limit(self, offset, num): 224 | """ 225 | Sets the limit for the most recent group or query. 226 | 227 | If no group has been defined yet (via `group_by()`) then this sets 228 | the limit for the initial pool of results from the query. Otherwise, 229 | this limits the number of items operated on from the previous group. 230 | 231 | Setting a limit on the initial search results may be useful when 232 | attempting to execute an aggregation on a sample of a large data set. 233 | 234 | ### Parameters 235 | 236 | - **offset**: Result offset from which to begin paging 237 | - **num**: Number of results to return 238 | 239 | 240 | Example of sorting the initial results: 241 | 242 | ``` 243 | AggregateRequest('@sale_amount:[10000, inf]')\ 244 | .limit(0, 10)\ 245 | .group_by('@state', r.count()) 246 | ``` 247 | 248 | Will only group by the states found in the first 10 results of the 249 | query `@sale_amount:[10000, inf]`. On the other hand, 250 | 251 | ``` 252 | AggregateRequest('@sale_amount:[10000, inf]')\ 253 | .limit(0, 1000)\ 254 | .group_by('@state', r.count()\ 255 | .limit(0, 10) 256 | ``` 257 | 258 | Will group all the results matching the query, but only return the 259 | first 10 groups. 260 | 261 | If you only wish to return a *top-N* style query, consider using 262 | `sort_by()` instead. 263 | 264 | """ 265 | limit = Limit(offset, num) 266 | self._limit = limit 267 | return self 268 | 269 | def sort_by(self, *fields, **kwargs): 270 | """ 271 | Indicate how the results should be sorted. This can also be used for 272 | *top-N* style queries 273 | 274 | ### Parameters 275 | 276 | - **fields**: The fields by which to sort. This can be either a single 277 | field or a list of fields. If you wish to specify order, you can 278 | use the `Asc` or `Desc` wrapper classes. 279 | - **max**: Maximum number of results to return. This can be used instead 280 | of `LIMIT` and is also faster. 281 | 282 | 283 | Example of sorting by `foo` ascending and `bar` descending: 284 | 285 | ``` 286 | sort_by(Asc('@foo'), Desc('@bar')) 287 | ``` 288 | 289 | Return the top 10 customers: 290 | 291 | ``` 292 | AggregateRequest()\ 293 | .group_by('@customer', r.sum('@paid').alias(FIELDNAME))\ 294 | .sort_by(Desc('@paid'), max=10) 295 | ``` 296 | """ 297 | if isinstance(fields, (string_types, SortDirection)): 298 | fields = [fields] 299 | 300 | max = kwargs.get('max', 0) 301 | sortby = SortBy(fields, max) 302 | 303 | self._aggregateplan.extend(sortby.build_args()) 304 | return self 305 | 306 | def filter(self, expressions): 307 | """ 308 | Specify filter for post-query results using predicates relating to values in the result set. 309 | 310 | ### Parameters 311 | 312 | - **fields**: Fields to group by. This can either be a single string, 313 | or a list of strings. 314 | """ 315 | if isinstance(expressions, (string_types)): 316 | expressions = [expressions] 317 | 318 | for expression in expressions: 319 | self._aggregateplan.extend(['FILTER', expression]) 320 | 321 | return self 322 | 323 | 324 | 325 | def with_schema(self): 326 | """ 327 | If set, the `schema` property will contain a list of `[field, type]` 328 | entries in the result object. 329 | """ 330 | self._with_schema = True 331 | return self 332 | 333 | def verbatim(self): 334 | self._verbatim = True 335 | return self 336 | 337 | def cursor(self, count=0, max_idle=0.0): 338 | args = ['WITHCURSOR'] 339 | if count: 340 | args += ['COUNT', str(count)] 341 | if max_idle: 342 | args += ['MAXIDLE', str(max_idle * 1000)] 343 | self._cursor = args 344 | return self 345 | 346 | def _limit_2_args(self, limit): 347 | if limit[1]: 348 | return ['LIMIT'] + [str(x) for x in limit] 349 | else: 350 | return [] 351 | 352 | def build_args(self): 353 | # @foo:bar ... 354 | ret = [self._query] 355 | 356 | if self._with_schema: 357 | ret.append('WITHSCHEMA') 358 | 359 | if self._verbatim: 360 | ret.append('VERBATIM') 361 | 362 | if self._cursor: 363 | ret += self._cursor 364 | 365 | if self._loadfields: 366 | ret.append('LOAD') 367 | ret.append(str(len(self._loadfields))) 368 | ret.extend(self._loadfields) 369 | 370 | ret.extend(self._aggregateplan) 371 | 372 | ret += self._limit.build_args() 373 | 374 | return ret 375 | 376 | 377 | class Cursor(object): 378 | def __init__(self, cid): 379 | self.cid = cid 380 | self.max_idle = 0 381 | self.count = 0 382 | 383 | def build_args(self): 384 | args = [str(self.cid)] 385 | if self.max_idle: 386 | args += ['MAXIDLE', str(self.max_idle)] 387 | if self.count: 388 | args += ['COUNT', str(self.count)] 389 | return args 390 | 391 | 392 | class AggregateResult(object): 393 | def __init__(self, rows, cursor, schema): 394 | self.rows = rows 395 | self.cursor = cursor 396 | self.schema = schema 397 | 398 | def __repr__(self): 399 | return "<{} at 0x{:x} Rows={}, Cursor={}>".format( 400 | self.__class__.__name__, 401 | id(self), 402 | len(self.rows), 403 | self.cursor.cid if self.cursor else -1) -------------------------------------------------------------------------------- /redisearch/auto_complete.py: -------------------------------------------------------------------------------- 1 | from redis import Redis, ConnectionPool 2 | from six.moves import xrange 3 | 4 | from ._util import to_string 5 | 6 | class Suggestion(object): 7 | """ 8 | Represents a single suggestion being sent or returned from the auto complete server 9 | """ 10 | def __init__(self, string, score=1.0, payload=None): 11 | self.string = to_string(string) 12 | self.payload = to_string(payload) 13 | self.score = score 14 | 15 | def __repr__(self): 16 | return self.string 17 | 18 | 19 | class SuggestionParser(object): 20 | """ 21 | Internal class used to parse results from the `SUGGET` command. 22 | This needs to consume either 1, 2, or 3 values at a time from 23 | the return value depending on what objects were requested 24 | """ 25 | def __init__(self, with_scores, with_payloads, ret): 26 | self.with_scores = with_scores 27 | self.with_payloads = with_payloads 28 | 29 | if with_scores and with_payloads: 30 | self.sugsize = 3 31 | self._scoreidx = 1 32 | self._payloadidx = 2 33 | elif with_scores: 34 | self.sugsize = 2 35 | self._scoreidx = 1 36 | elif with_payloads: 37 | self.sugsize = 2 38 | self._payloadidx = 1 39 | else: 40 | self.sugsize = 1 41 | self._scoreidx = -1 42 | 43 | self._sugs = ret 44 | 45 | def __iter__(self): 46 | for i in xrange(0, len(self._sugs), self.sugsize): 47 | ss = self._sugs[i] 48 | score = float(self._sugs[i + self._scoreidx]) if self.with_scores else 1.0 49 | payload = self._sugs[i + self._payloadidx] if self.with_payloads else None 50 | yield Suggestion(ss, score, payload) 51 | 52 | 53 | class AutoCompleter(object): 54 | """ 55 | A client to RediSearch's AutoCompleter API 56 | 57 | It provides prefix searches with optionally fuzzy matching of prefixes 58 | """ 59 | 60 | SUGADD_COMMAND = "FT.SUGADD" 61 | SUGDEL_COMMAND = "FT.SUGDEL" 62 | SUGLEN_COMMAND = "FT.SUGLEN" 63 | SUGGET_COMMAND = "FT.SUGGET" 64 | 65 | INCR = 'INCR' 66 | WITHSCORES = 'WITHSCORES' 67 | FUZZY = 'FUZZY' 68 | WITHPAYLOADS = 'WITHPAYLOADS' 69 | 70 | def __init__(self, key, host='localhost', port=6379, conn = None, password=None): 71 | """ 72 | Create a new AutoCompleter client for the given key, and optional host and port 73 | 74 | If conn is not None, we employ an already existing redis connection 75 | """ 76 | 77 | self.key = key 78 | self.redis = conn if conn is not None else Redis( 79 | connection_pool = ConnectionPool(host=host, port=port, password=password)) 80 | 81 | def add_suggestions(self, *suggestions, **kwargs): 82 | """ 83 | Add suggestion terms to the AutoCompleter engine. Each suggestion has a score and string. 84 | 85 | If kwargs['increment'] is true and the terms are already in the server's dictionary, we increment their scores 86 | """ 87 | # If Transaction is not set to false it will attempt a MULTI/EXEC which will error 88 | pipe = self.redis.pipeline(transaction=False) 89 | for sug in suggestions: 90 | args = [AutoCompleter.SUGADD_COMMAND, self.key, sug.string, sug.score] 91 | if kwargs.get('increment'): 92 | args.append(AutoCompleter.INCR) 93 | if sug.payload: 94 | args.append('PAYLOAD') 95 | args.append(sug.payload) 96 | 97 | pipe.execute_command(*args) 98 | 99 | return pipe.execute()[-1] 100 | 101 | 102 | 103 | def len(self): 104 | """ 105 | Return the number of entries in the AutoCompleter index 106 | """ 107 | return self.redis.execute_command(AutoCompleter.SUGLEN_COMMAND, self.key) 108 | 109 | def delete(self, string): 110 | """ 111 | Delete a string from the AutoCompleter index. 112 | Returns 1 if the string was found and deleted, 0 otherwise 113 | """ 114 | return self.redis.execute_command(AutoCompleter.SUGDEL_COMMAND, self.key, string) 115 | 116 | def get_suggestions(self, prefix, fuzzy = False, num = 10, with_scores = False, with_payloads=False): 117 | """ 118 | Get a list of suggestions from the AutoCompleter, for a given prefix 119 | 120 | ### Parameters: 121 | - **prefix**: the prefix we are searching. **Must be valid ascii or utf-8** 122 | - **fuzzy**: If set to true, the prefix search is done in fuzzy mode. 123 | **NOTE**: Running fuzzy searches on short (<3 letters) prefixes can be very slow, and even scan the entire index. 124 | - **with_scores**: if set to true, we also return the (refactored) score of each suggestion. 125 | This is normally not needed, and is NOT the original score inserted into the index 126 | - **with_payloads**: Return suggestion payloads 127 | - **num**: The maximum number of results we return. Note that we might return less. The algorithm trims irrelevant suggestions. 128 | 129 | Returns a list of Suggestion objects. If with_scores was False, the score of all suggestions is 1. 130 | """ 131 | 132 | args = [AutoCompleter.SUGGET_COMMAND, self.key, prefix, 'MAX', num] 133 | if fuzzy: 134 | args.append(AutoCompleter.FUZZY) 135 | if with_scores: 136 | args.append(AutoCompleter.WITHSCORES) 137 | if with_payloads: 138 | args.append(AutoCompleter.WITHPAYLOADS) 139 | 140 | ret = self.redis.execute_command(*args) 141 | results = [] 142 | if not ret: 143 | return results 144 | 145 | parser = SuggestionParser(with_scores, with_payloads, ret) 146 | return [s for s in parser] 147 | -------------------------------------------------------------------------------- /redisearch/client.py: -------------------------------------------------------------------------------- 1 | from redis import Redis, ConnectionPool 2 | import itertools 3 | import time 4 | import six 5 | 6 | from .document import Document 7 | from .result import Result 8 | from .query import Query 9 | from ._util import to_string 10 | from .aggregation import AggregateRequest, AggregateResult, Cursor 11 | from enum import Enum 12 | 13 | 14 | class Field(object): 15 | 16 | NUMERIC = 'NUMERIC' 17 | TEXT = 'TEXT' 18 | WEIGHT = 'WEIGHT' 19 | GEO = 'GEO' 20 | TAG = 'TAG' 21 | SORTABLE = 'SORTABLE' 22 | NOINDEX = 'NOINDEX' 23 | AS = 'AS' 24 | 25 | def __init__(self, name, args=[], sortable=False, no_index=False, as_name=None): 26 | self.name = name 27 | self.args = args 28 | self.args_suffix = list() 29 | self.as_name = as_name 30 | 31 | if sortable: 32 | self.args_suffix.append(Field.SORTABLE) 33 | if no_index: 34 | self.args_suffix.append(Field.NOINDEX) 35 | 36 | if no_index and not sortable: 37 | raise ValueError('Non-Sortable non-Indexable fields are ignored') 38 | 39 | def append_arg(self, value): 40 | self.args.append(value) 41 | 42 | def redis_args(self): 43 | args = [self.name] 44 | if self.as_name: 45 | args += [self.AS, self.as_name] 46 | args += self.args 47 | args += self.args_suffix 48 | return args 49 | 50 | 51 | class TextField(Field): 52 | """ 53 | TextField is used to define a text field in a schema definition 54 | """ 55 | NOSTEM = 'NOSTEM' 56 | PHONETIC = 'PHONETIC' 57 | 58 | def __init__(self, name, weight=1.0, no_stem=False, phonetic_matcher=None, **kwargs): 59 | Field.__init__(self, name, args=[Field.TEXT, Field.WEIGHT, weight], **kwargs) 60 | 61 | if no_stem: 62 | Field.append_arg(self, self.NOSTEM) 63 | if phonetic_matcher and phonetic_matcher in ['dm:en', 'dm:fr', 'dm:pt', 'dm:es']: 64 | Field.append_arg(self, self.PHONETIC) 65 | Field.append_arg(self, phonetic_matcher) 66 | 67 | 68 | class NumericField(Field): 69 | """ 70 | NumericField is used to define a numeric field in a schema definition 71 | """ 72 | 73 | def __init__(self, name, **kwargs): 74 | Field.__init__(self, name, args=[Field.NUMERIC], **kwargs) 75 | 76 | 77 | class GeoField(Field): 78 | """ 79 | GeoField is used to define a geo-indexing field in a schema definition 80 | """ 81 | 82 | def __init__(self, name, **kwargs): 83 | Field.__init__(self, name, args=[Field.GEO], **kwargs) 84 | 85 | 86 | class TagField(Field): 87 | SEPARATOR = 'SEPARATOR' 88 | 89 | """ 90 | TagField is a tag-indexing field with simpler compression and tokenization. 91 | See http://redisearch.io/Tags/ 92 | """ 93 | 94 | def __init__(self, name, separator=',', **kwargs): 95 | Field.__init__(self, name, args=[Field.TAG, self.SEPARATOR, separator], **kwargs) 96 | 97 | 98 | class IndexType(Enum): 99 | """ 100 | Enum of the currently supported index types. 101 | """ 102 | HASH = 1 103 | JSON = 2 104 | 105 | class IndexDefinition(object): 106 | """ 107 | IndexDefinition is used to define a index definition for automatic indexing on Hash or Json update. 108 | """ 109 | 110 | def __init__(self, prefix=[], filter=None, language_field=None, language=None, score_field=None, 111 | score=1.0, payload_field=None, index_type=None): 112 | args = [] 113 | 114 | if index_type is IndexType.HASH: 115 | args.extend(['ON', 'HASH']) 116 | elif index_type is IndexType.JSON: 117 | args.extend(['ON', 'JSON']) 118 | elif index_type is not None: 119 | raise RuntimeError("index_type must be one of {}".format(list(IndexType))) 120 | 121 | if len(prefix) > 0: 122 | args.append('PREFIX') 123 | args.append(len(prefix)) 124 | for p in prefix: 125 | args.append(p) 126 | 127 | if filter is not None: 128 | args.append('FILTER') 129 | args.append(filter) 130 | 131 | if language_field is not None: 132 | args.append('LANGUAGE_FIELD') 133 | args.append(language_field) 134 | 135 | if language is not None: 136 | args.append('LANGUAGE') 137 | args.append(language) 138 | 139 | if score_field is not None: 140 | args.append('SCORE_FIELD') 141 | args.append(score_field) 142 | 143 | if score is not None: 144 | args.append('SCORE') 145 | args.append(score) 146 | 147 | if payload_field is not None: 148 | args.append('PAYLOAD_FIELD') 149 | args.append(payload_field) 150 | 151 | self.args = args 152 | 153 | 154 | class Client(object): 155 | """ 156 | A client for the RediSearch module. 157 | It abstracts the API of the module and lets you just use the engine 158 | """ 159 | 160 | NUMERIC = 'NUMERIC' 161 | 162 | CREATE_CMD = 'FT.CREATE' 163 | ALTER_CMD = 'FT.ALTER' 164 | SEARCH_CMD = 'FT.SEARCH' 165 | ADD_CMD = 'FT.ADD' 166 | ADDHASH_CMD = "FT.ADDHASH" 167 | DROP_CMD = 'FT.DROP' 168 | EXPLAIN_CMD = 'FT.EXPLAIN' 169 | DEL_CMD = 'FT.DEL' 170 | AGGREGATE_CMD = 'FT.AGGREGATE' 171 | CURSOR_CMD = 'FT.CURSOR' 172 | SPELLCHECK_CMD = 'FT.SPELLCHECK' 173 | DICT_ADD_CMD = 'FT.DICTADD' 174 | DICT_DEL_CMD = 'FT.DICTDEL' 175 | DICT_DUMP_CMD = 'FT.DICTDUMP' 176 | GET_CMD = 'FT.GET' 177 | MGET_CMD = 'FT.MGET' 178 | CONFIG_CMD = 'FT.CONFIG' 179 | TAGVALS_CMD = 'FT.TAGVALS' 180 | ALIAS_ADD_CMD = 'FT.ALIASADD' 181 | ALIAS_UPDATE_CMD = 'FT.ALIASUPDATE' 182 | ALIAS_DEL_CMD = 'FT.ALIASDEL' 183 | 184 | NOOFFSETS = 'NOOFFSETS' 185 | NOFIELDS = 'NOFIELDS' 186 | NOHL = 'NOHL' 187 | NOFREQS = 'NOFREQS' 188 | MAXTEXTFIELDS = 'MAXTEXTFIELDS' 189 | TEMPORARY = 'TEMPORARY' 190 | STOPWORDS = 'STOPWORDS' 191 | SKIPINITIALSCAN = 'SKIPINITIALSCAN' 192 | 193 | class BatchIndexer(object): 194 | """ 195 | A batch indexer allows you to automatically batch 196 | document indexeing in pipelines, flushing it every N documents. 197 | """ 198 | 199 | def __init__(self, client, chunk_size=1000): 200 | 201 | self.client = client 202 | self.pipeline = client.redis.pipeline(False) 203 | self.total = 0 204 | self.chunk_size = chunk_size 205 | self.current_chunk = 0 206 | 207 | def __del__(self): 208 | if self.current_chunk: 209 | self.commit() 210 | 211 | def add_document(self, doc_id, nosave=False, score=1.0, payload=None, 212 | replace=False, partial=False, no_create=False, **fields): 213 | """ 214 | Add a document to the batch query 215 | """ 216 | self.client._add_document(doc_id, conn=self.pipeline, nosave=nosave, score=score, 217 | payload=payload, replace=replace, 218 | partial=partial, no_create=no_create, **fields) 219 | self.current_chunk += 1 220 | self.total += 1 221 | if self.current_chunk >= self.chunk_size: 222 | self.commit() 223 | 224 | def add_document_hash( 225 | self, doc_id, score=1.0, replace=False, 226 | ): 227 | """ 228 | Add a hash to the batch query 229 | """ 230 | self.client._add_document_hash( 231 | doc_id, conn=self.pipeline, score=score, replace=replace, 232 | ) 233 | self.current_chunk += 1 234 | self.total += 1 235 | if self.current_chunk >= self.chunk_size: 236 | self.commit() 237 | 238 | def commit(self): 239 | """ 240 | Manually commit and flush the batch indexing query 241 | """ 242 | self.pipeline.execute() 243 | self.current_chunk = 0 244 | 245 | def __init__(self, index_name, host='localhost', port=6379, conn=None, password=None, decode_responses=True): 246 | """ 247 | Create a new Client for the given index_name, and optional host and port 248 | 249 | If conn is not None, we employ an already existing redis connection 250 | """ 251 | 252 | self.index_name = index_name 253 | 254 | self.redis = conn if conn is not None else Redis( 255 | connection_pool=ConnectionPool(host=host, port=port, password=password, 256 | decode_responses=decode_responses)) 257 | 258 | def batch_indexer(self, chunk_size=100): 259 | """ 260 | Create a new batch indexer from the client with a given chunk size 261 | """ 262 | return Client.BatchIndexer(self, chunk_size=chunk_size) 263 | 264 | def create_index(self, fields, no_term_offsets=False, 265 | no_field_flags=False, stopwords=None, definition=None, 266 | max_text_fields=False, temporary=None, no_highlight=False, 267 | no_term_frequencies=False, skip_initial_scan=False): 268 | """ 269 | Create the search index. The index must not already exist. 270 | 271 | ### Parameters: 272 | 273 | - **fields**: a list of TextField or NumericField objects 274 | - **no_term_offsets**: If true, we will not save term offsets in the index 275 | - **no_field_flags**: If true, we will not save field flags that allow searching in specific fields 276 | - **stopwords**: If not None, we create the index with this custom stopword list. The list can be empty 277 | - **max_text_fields**: If true, we will encode indexes as if there were more than 32 text fields, 278 | which allows you to add additional fields (beyond 32). 279 | - **temporary**: Create a lightweight temporary index which will expire after the specified period of 280 | inactivity (in seconds). The internal idle timer is reset whenever the index is searched or added to. 281 | - **no_highlight**: If true, disabling highlighting support. Also implied by no_term_offsets. 282 | - **no_term_frequencies**: If true, we avoid saving the term frequencies in the index. 283 | - **skip_initial_scan**: If true, we do not scan and index. 284 | """ 285 | 286 | args = [self.CREATE_CMD, self.index_name] 287 | if definition is not None: 288 | args += definition.args 289 | if max_text_fields: 290 | args.append(self.MAXTEXTFIELDS) 291 | if temporary is not None and isinstance(temporary, int): 292 | args.append(self.TEMPORARY) 293 | args.append(temporary) 294 | if no_term_offsets: 295 | args.append(self.NOOFFSETS) 296 | if no_highlight: 297 | args.append(self.NOHL) 298 | if no_field_flags: 299 | args.append(self.NOFIELDS) 300 | if no_term_frequencies: 301 | args.append(self.NOFREQS) 302 | if skip_initial_scan: 303 | args.append(self.SKIPINITIALSCAN) 304 | if stopwords is not None and isinstance(stopwords, (list, tuple, set)): 305 | args += [self.STOPWORDS, len(stopwords)] 306 | if len(stopwords) > 0: 307 | args += list(stopwords) 308 | 309 | args.append('SCHEMA') 310 | 311 | try: 312 | args += list(itertools.chain(*(f.redis_args() for f in fields))) 313 | except TypeError: 314 | args += fields.redis_args() 315 | 316 | return self.redis.execute_command(*args) 317 | 318 | def alter_schema_add(self, fields): 319 | """ 320 | Alter the existing search index by adding new fields. The index must already exist. 321 | 322 | ### Parameters: 323 | 324 | - **fields**: a list of Field objects to add for the index 325 | """ 326 | 327 | args = [self.ALTER_CMD, self.index_name, 'SCHEMA', 'ADD'] 328 | 329 | try: 330 | args += list(itertools.chain(*(f.redis_args() for f in fields))) 331 | except TypeError: 332 | args += fields.redis_args() 333 | 334 | return self.redis.execute_command(*args) 335 | 336 | def drop_index(self, delete_documents=True): 337 | """ 338 | Drop the index if it exists. Deprecated from RediSearch 2.0. 339 | 340 | ### Parameters: 341 | 342 | - **delete_documents**: If `True`, all documents will be deleted. 343 | """ 344 | keep_str = '' if delete_documents else 'KEEPDOCS' 345 | return self.redis.execute_command(self.DROP_CMD, self.index_name, keep_str) 346 | 347 | def dropindex(self, delete_documents=False): 348 | """ 349 | Drop the index if it exists. 350 | Replaced `drop_index` in RediSearch 2.0. 351 | Default behavior was changed to not delete the indexed documents. 352 | 353 | ### Parameters: 354 | 355 | - **delete_documents**: If `True`, all documents will be deleted. 356 | """ 357 | keep_str = '' if delete_documents else 'KEEPDOCS' 358 | return self.redis.execute_command(self.DROP_CMD, self.index_name, keep_str) 359 | 360 | def _add_document(self, doc_id, conn=None, nosave=False, score=1.0, payload=None, 361 | replace=False, partial=False, language=None, no_create=False, **fields): 362 | """ 363 | Internal add_document used for both batch and single doc indexing 364 | """ 365 | if conn is None: 366 | conn = self.redis 367 | 368 | if partial or no_create: 369 | replace = True 370 | 371 | args = [self.ADD_CMD, self.index_name, doc_id, score] 372 | if nosave: 373 | args.append('NOSAVE') 374 | if payload is not None: 375 | args.append('PAYLOAD') 376 | args.append(payload) 377 | if replace: 378 | args.append('REPLACE') 379 | if partial: 380 | args.append('PARTIAL') 381 | if no_create: 382 | args.append('NOCREATE') 383 | if language: 384 | args += ['LANGUAGE', language] 385 | args.append('FIELDS') 386 | args += list(itertools.chain(*fields.items())) 387 | return conn.execute_command(*args) 388 | 389 | def _add_document_hash( 390 | self, doc_id, conn=None, score=1.0, language=None, replace=False, 391 | ): 392 | """ 393 | Internal add_document_hash used for both batch and single doc indexing 394 | """ 395 | if conn is None: 396 | conn = self.redis 397 | 398 | args = [self.ADDHASH_CMD, self.index_name, doc_id, score] 399 | 400 | if replace: 401 | args.append("REPLACE") 402 | 403 | if language: 404 | args += ["LANGUAGE", language] 405 | 406 | return conn.execute_command(*args) 407 | 408 | def add_document(self, doc_id, nosave=False, score=1.0, payload=None, 409 | replace=False, partial=False, language=None, no_create=False, **fields): 410 | """ 411 | Add a single document to the index. 412 | 413 | ### Parameters 414 | 415 | - **doc_id**: the id of the saved document. 416 | - **nosave**: if set to true, we just index the document, and don't save a copy of it. This means that searches will just return ids. 417 | - **score**: the document ranking, between 0.0 and 1.0 418 | - **payload**: optional inner-index payload we can save for fast access in scoring functions 419 | - **replace**: if True, and the document already is in the index, we perform an update and reindex the document 420 | - **partial**: if True, the fields specified will be added to the existing document. 421 | This has the added benefit that any fields specified with `no_index` 422 | will not be reindexed again. Implies `replace` 423 | - **language**: Specify the language used for document tokenization. 424 | - **no_create**: if True, the document is only updated and reindexed if it already exists. 425 | If the document does not exist, an error will be returned. Implies `replace` 426 | - **fields** kwargs dictionary of the document fields to be saved and/or indexed. 427 | NOTE: Geo points shoule be encoded as strings of "lon,lat" 428 | """ 429 | return self._add_document(doc_id, conn=None, nosave=nosave, score=score, 430 | payload=payload, replace=replace, 431 | partial=partial, language=language, 432 | no_create=no_create, **fields) 433 | 434 | def add_document_hash( 435 | self, doc_id, score=1.0, language=None, replace=False, 436 | ): 437 | """ 438 | Add a hash document to the index. 439 | 440 | ### Parameters 441 | 442 | - **doc_id**: the document's id. This has to be an existing HASH key in Redis that will hold the fields the index needs. 443 | - **score**: the document ranking, between 0.0 and 1.0 444 | - **replace**: if True, and the document already is in the index, we perform an update and reindex the document 445 | - **language**: Specify the language used for document tokenization. 446 | """ 447 | return self._add_document_hash( 448 | doc_id, conn=None, score=score, language=language, replace=replace, 449 | ) 450 | 451 | def delete_document(self, doc_id, conn=None, delete_actual_document=False): 452 | """ 453 | Delete a document from index 454 | Returns 1 if the document was deleted, 0 if not 455 | 456 | ### Parameters 457 | 458 | - **delete_actual_document**: if set to True, RediSearch also delete the actual document if it is in the index 459 | """ 460 | args = [self.DEL_CMD, self.index_name, doc_id] 461 | if conn is None: 462 | conn = self.redis 463 | if delete_actual_document: 464 | args.append('DD') 465 | 466 | return conn.execute_command(*args) 467 | 468 | def load_document(self, id): 469 | """ 470 | Load a single document by id 471 | """ 472 | fields = self.redis.hgetall(id) 473 | if six.PY3: 474 | f2 = {to_string(k): to_string(v) for k, v in fields.items()} 475 | fields = f2 476 | 477 | try: 478 | del fields['id'] 479 | except KeyError: 480 | pass 481 | 482 | return Document(id=id, **fields) 483 | 484 | def get(self, *ids): 485 | """ 486 | Returns the full contents of multiple documents. 487 | 488 | ### Parameters 489 | 490 | - **ids**: the ids of the saved documents. 491 | """ 492 | 493 | return self.redis.execute_command('FT.MGET', self.index_name, *ids) 494 | 495 | def info(self): 496 | """ 497 | Get info an stats about the the current index, including the number of documents, memory consumption, etc 498 | """ 499 | 500 | res = self.redis.execute_command('FT.INFO', self.index_name) 501 | it = six.moves.map(to_string, res) 502 | return dict(six.moves.zip(it, it)) 503 | 504 | def _mk_query_args(self, query): 505 | args = [self.index_name] 506 | 507 | if isinstance(query, six.string_types): 508 | # convert the query from a text to a query object 509 | query = Query(query) 510 | if not isinstance(query, Query): 511 | raise ValueError("Bad query type %s" % type(query)) 512 | 513 | args += query.get_args() 514 | return args, query 515 | 516 | def search(self, query): 517 | """ 518 | Search the index for a given query, and return a result of documents 519 | 520 | ### Parameters 521 | 522 | - **query**: the search query. Either a text for simple queries with default parameters, or a Query object for complex queries. 523 | See RediSearch's documentation on query format 524 | """ 525 | args, query = self._mk_query_args(query) 526 | st = time.time() 527 | res = self.redis.execute_command(self.SEARCH_CMD, *args) 528 | 529 | return Result(res, 530 | not query._no_content, 531 | duration=(time.time() - st) * 1000.0, 532 | has_payload=query._with_payloads, 533 | with_scores=query._with_scores) 534 | 535 | def explain(self, query): 536 | args, query_text = self._mk_query_args(query) 537 | return self.redis.execute_command(self.EXPLAIN_CMD, *args) 538 | 539 | def aggregate(self, query): 540 | """ 541 | Issue an aggregation query 542 | 543 | ### Parameters 544 | 545 | **query**: This can be either an `AggeregateRequest`, or a `Cursor` 546 | 547 | An `AggregateResult` object is returned. You can access the rows from its 548 | `rows` property, which will always yield the rows of the result 549 | """ 550 | if isinstance(query, AggregateRequest): 551 | has_cursor = bool(query._cursor) 552 | cmd = [self.AGGREGATE_CMD, self.index_name] + query.build_args() 553 | elif isinstance(query, Cursor): 554 | has_cursor = True 555 | cmd = [self.CURSOR_CMD, 'READ', 556 | self.index_name] + query.build_args() 557 | else: 558 | raise ValueError('Bad query', query) 559 | 560 | raw = self.redis.execute_command(*cmd) 561 | if has_cursor: 562 | if isinstance(query, Cursor): 563 | query.cid = raw[1] 564 | cursor = query 565 | else: 566 | cursor = Cursor(raw[1]) 567 | raw = raw[0] 568 | else: 569 | cursor = None 570 | 571 | if isinstance(query, AggregateRequest) and query._with_schema: 572 | schema = raw[0] 573 | rows = raw[2:] 574 | else: 575 | schema = None 576 | rows = raw[1:] 577 | 578 | res = AggregateResult(rows, cursor, schema) 579 | return res 580 | 581 | def spellcheck(self, query, distance=None, include=None, exclude=None): 582 | """ 583 | Issue a spellcheck query 584 | 585 | ### Parameters 586 | 587 | **query**: search query. 588 | **distance***: the maximal Levenshtein distance for spelling suggestions (default: 1, max: 4). 589 | **include**: specifies an inclusion custom dictionary. 590 | **exclude**: specifies an exclusion custom dictionary. 591 | """ 592 | cmd = [self.SPELLCHECK_CMD, self.index_name, query] 593 | if distance: 594 | cmd.extend(['DISTANCE', distance]) 595 | 596 | if include: 597 | cmd.extend(['TERMS', 'INCLUDE', include]) 598 | 599 | if exclude: 600 | cmd.extend(['TERMS', 'EXCLUDE', exclude]) 601 | 602 | raw = self.redis.execute_command(*cmd) 603 | 604 | corrections = {} 605 | if raw == 0: 606 | return corrections 607 | 608 | for _correction in raw: 609 | if isinstance(_correction, six.integer_types) and _correction == 0: 610 | continue 611 | 612 | if len(_correction) != 3: 613 | continue 614 | if not _correction[2]: 615 | continue 616 | if not _correction[2][0]: 617 | continue 618 | 619 | # For spellcheck output 620 | # 1) 1) "TERM" 621 | # 2) "{term1}" 622 | # 3) 1) 1) "{score1}" 623 | # 2) "{suggestion1}" 624 | # 2) 1) "{score2}" 625 | # 2) "{suggestion2}" 626 | # 627 | # Following dictionary will be made 628 | # corrections = { 629 | # '{term1}': [ 630 | # {'score': '{score1}', 'suggestion': '{suggestion1}'}, 631 | # {'score': '{score2}', 'suggestion': '{suggestion2}'} 632 | # ] 633 | # } 634 | corrections[_correction[1]] = [ 635 | {'score': _item[0], 'suggestion':_item[1]} 636 | for _item in _correction[2] 637 | ] 638 | 639 | return corrections 640 | 641 | def dict_add(self, name, *terms): 642 | """Adds terms to a dictionary. 643 | 644 | ### Parameters 645 | 646 | - **name**: Dictionary name. 647 | - **terms**: List of items for adding to the dictionary. 648 | """ 649 | cmd = [self.DICT_ADD_CMD, name] 650 | cmd.extend(terms) 651 | raw = self.redis.execute_command(*cmd) 652 | return raw 653 | 654 | def dict_del(self, name, *terms): 655 | """Deletes terms from a dictionary. 656 | 657 | ### Parameters 658 | 659 | - **name**: Dictionary name. 660 | - **terms**: List of items for removing from the dictionary. 661 | """ 662 | cmd = [self.DICT_DEL_CMD, name] 663 | cmd.extend(terms) 664 | raw = self.redis.execute_command(*cmd) 665 | return raw 666 | 667 | def dict_dump(self, name): 668 | """Dumps all terms in the given dictionary. 669 | 670 | ### Parameters 671 | 672 | - **name**: Dictionary name. 673 | """ 674 | cmd = [self.DICT_DUMP_CMD, name] 675 | raw = self.redis.execute_command(*cmd) 676 | return raw 677 | 678 | def config_set(self, option, value): 679 | """Set runtime configuration option. 680 | 681 | ### Parameters 682 | 683 | - **option**: the name of the configuration option. 684 | - **value**: a value for the configuration option. 685 | """ 686 | cmd = [self.CONFIG_CMD, 'SET', option, value] 687 | raw = self.redis.execute_command(*cmd) 688 | return raw == 'OK' 689 | 690 | def config_get(self, option): 691 | """Get runtime configuration option value. 692 | 693 | ### Parameters 694 | 695 | - **option**: the name of the configuration option. 696 | """ 697 | cmd = [self.CONFIG_CMD, 'GET', option] 698 | res = {} 699 | raw = self.redis.execute_command(*cmd) 700 | if raw: 701 | for kvs in raw: 702 | res[kvs[0]] = kvs[1] 703 | return res 704 | 705 | def tagvals(self, tagfield): 706 | """ 707 | Return a list of all possible tag values 708 | 709 | ### Parameters 710 | 711 | - **tagfield**: Tag field name 712 | """ 713 | 714 | cmd = self.redis.execute_command(self.TAGVALS_CMD, self.index_name, tagfield) 715 | return cmd 716 | 717 | def aliasadd(self, alias): 718 | """ 719 | Alias a search index - will fail if alias already exists 720 | 721 | ### Parameters 722 | 723 | - **alias**: Name of the alias to create 724 | """ 725 | 726 | cmd = self.redis.execute_command(self.ALIAS_ADD_CMD, alias, self.index_name) 727 | return cmd 728 | 729 | def aliasupdate(self, alias): 730 | """ 731 | Updates an alias - will fail if alias does not already exist 732 | 733 | ### Parameters 734 | 735 | - **alias**: Name of the alias to create 736 | """ 737 | 738 | cmd = self.redis.execute_command(self.ALIAS_UPDATE_CMD, alias, self.index_name) 739 | return cmd 740 | 741 | def aliasdel(self, alias): 742 | """ 743 | Removes an alias to a search index 744 | 745 | ### Parameters 746 | 747 | - **alias**: Name of the alias to delete 748 | """ 749 | 750 | cmd = self.redis.execute_command(self.ALIAS_DEL_CMD, alias) 751 | return cmd 752 | -------------------------------------------------------------------------------- /redisearch/document.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | class Document(object): 4 | """ 5 | Represents a single document in a result set 6 | """ 7 | 8 | def __init__(self, id, payload = None, **fields): 9 | 10 | self.id = id 11 | self.payload = payload 12 | for k, v in six.iteritems(fields): 13 | setattr(self, k, v) 14 | 15 | def __repr__(self): 16 | 17 | return 'Document %s' % self.__dict__ -------------------------------------------------------------------------------- /redisearch/query.py: -------------------------------------------------------------------------------- 1 | import six 2 | 3 | class Query(object): 4 | """ 5 | Query is used to build complex queries that have more parameters than just the query string. 6 | The query string is set in the constructor, and other options have setter functions. 7 | 8 | The setter functions return the query object, so they can be chained, 9 | i.e. `Query("foo").verbatim().filter(...)` etc. 10 | """ 11 | 12 | def __init__(self, query_string): 13 | """ 14 | Create a new query object. 15 | The query string is set in the constructor, and other options have setter functions. 16 | """ 17 | 18 | self._query_string = query_string 19 | self._offset = 0 20 | self._num = 10 21 | self._no_content = False 22 | self._no_stopwords = False 23 | self._fields = None 24 | self._verbatim = False 25 | self._with_payloads = False 26 | self._with_scores = False 27 | self._scorer = False 28 | self._filters = list() 29 | self._ids = None 30 | self._slop = -1 31 | self._in_order = False 32 | self._sortby = None 33 | self._return_fields = [] 34 | self._summarize_fields = [] 35 | self._highlight_fields = [] 36 | self._language = None 37 | 38 | def query_string(self): 39 | """ 40 | Return the query string of this query only 41 | """ 42 | return self._query_string 43 | 44 | def limit_ids(self, *ids): 45 | """ 46 | Limit the results to a specific set of pre-known document ids of any length 47 | """ 48 | self._ids = ids 49 | return self 50 | 51 | def return_fields(self, *fields): 52 | """ 53 | Add fields to return fields 54 | """ 55 | self._return_fields += fields 56 | return self 57 | 58 | def return_field(self, field, as_field=None): 59 | """ 60 | Add field to return fields (Optional: add 'AS' name to the field) 61 | """ 62 | self._return_fields.append(field) 63 | if as_field is not None: 64 | self._return_fields += ("AS", as_field) 65 | return self 66 | 67 | def _mk_field_list(self, fields): 68 | if not fields: 69 | return [] 70 | return [fields] if isinstance(fields, six.string_types) else list(fields) 71 | 72 | def summarize(self, fields=None, context_len=None, num_frags=None, sep=None): 73 | """ 74 | Return an abridged format of the field, containing only the segments of 75 | the field which contain the matching term(s). 76 | 77 | If `fields` is specified, then only the mentioned fields are 78 | summarized; otherwise all results are summarized. 79 | 80 | Server side defaults are used for each option (except `fields`) if not specified 81 | 82 | - **fields** List of fields to summarize. All fields are summarized if not specified 83 | - **context_len** Amount of context to include with each fragment 84 | - **num_frags** Number of fragments per document 85 | - **sep** Separator string to separate fragments 86 | """ 87 | args = ['SUMMARIZE'] 88 | fields = self._mk_field_list(fields) 89 | if fields: 90 | args += ['FIELDS', str(len(fields))] + fields 91 | 92 | if context_len is not None: 93 | args += ['LEN', str(context_len)] 94 | if num_frags is not None: 95 | args += ['FRAGS', str(num_frags)] 96 | if sep is not None: 97 | args += ['SEPARATOR', sep] 98 | 99 | self._summarize_fields = args 100 | return self 101 | 102 | def highlight(self, fields=None, tags=None): 103 | """ 104 | Apply specified markup to matched term(s) within the returned field(s) 105 | 106 | - **fields** If specified then only those mentioned fields are highlighted, otherwise all fields are highlighted 107 | - **tags** A list of two strings to surround the match. 108 | """ 109 | args = ['HIGHLIGHT'] 110 | fields = self._mk_field_list(fields) 111 | if fields: 112 | args += ['FIELDS', str(len(fields))] + fields 113 | if tags: 114 | args += ['TAGS'] + list(tags) 115 | 116 | self._highlight_fields = args 117 | return self 118 | 119 | def language(self, language): 120 | """ 121 | Analyze the query as being in the specified language 122 | :param language: The language (e.g. `chinese` or `english`) 123 | """ 124 | self._language = language 125 | return self 126 | 127 | def slop(self, slop): 128 | """ 129 | Allow a masimum of N intervening non matched terms between phrase terms (0 means exact phrase) 130 | """ 131 | self._slop = slop 132 | return self 133 | 134 | def in_order(self): 135 | """ 136 | Match only documents where the query terms appear in the same order in the document. 137 | i.e. for the query 'hello world', we do not match 'world hello' 138 | """ 139 | self._in_order = True 140 | return self 141 | 142 | def scorer(self, scorer): 143 | """ 144 | Use a different scoring function to evaluate document relevance. Default is `TFIDF` 145 | :param scorer: The scoring function to use (e.g. `TFIDF.DOCNORM` or `BM25`) 146 | """ 147 | self._scorer = scorer 148 | return self 149 | 150 | def get_args(self): 151 | """ 152 | Format the redis arguments for this query and return them 153 | """ 154 | 155 | args = [self._query_string] 156 | 157 | if self._no_content: 158 | args.append('NOCONTENT') 159 | 160 | if self._fields: 161 | 162 | args.append('INFIELDS') 163 | args.append(len(self._fields)) 164 | args += self._fields 165 | 166 | if self._verbatim: 167 | args.append('VERBATIM') 168 | 169 | if self._no_stopwords: 170 | args.append('NOSTOPWORDS') 171 | 172 | if self._filters: 173 | for flt in self._filters: 174 | assert isinstance(flt, Filter) 175 | args += flt.args 176 | 177 | if self._with_payloads: 178 | args.append('WITHPAYLOADS') 179 | 180 | if self._scorer: 181 | args += ['SCORER', self._scorer] 182 | 183 | if self._with_scores: 184 | args.append('WITHSCORES') 185 | 186 | if self._ids: 187 | args.append('INKEYS') 188 | args.append(len(self._ids)) 189 | args += self._ids 190 | 191 | if self._slop >= 0: 192 | args += ['SLOP', self._slop] 193 | 194 | if self._in_order: 195 | args.append('INORDER') 196 | 197 | if self._return_fields: 198 | args.append('RETURN') 199 | args.append(len(self._return_fields)) 200 | args += self._return_fields 201 | 202 | if self._sortby: 203 | assert isinstance(self._sortby, SortbyField) 204 | args.append('SORTBY') 205 | args += self._sortby.args 206 | 207 | if self._language: 208 | args += ['LANGUAGE', self._language] 209 | 210 | args += self._summarize_fields + self._highlight_fields 211 | args += ["LIMIT", self._offset, self._num] 212 | return args 213 | 214 | def paging(self, offset, num): 215 | """ 216 | Set the paging for the query (defaults to 0..10). 217 | 218 | - **offset**: Paging offset for the results. Defaults to 0 219 | - **num**: How many results do we want 220 | """ 221 | self._offset = offset 222 | self._num = num 223 | return self 224 | 225 | def verbatim(self): 226 | """ 227 | Set the query to be verbatim, i.e. use no query expansion or stemming 228 | """ 229 | self._verbatim = True 230 | return self 231 | 232 | def no_content(self): 233 | """ 234 | Set the query to only return ids and not the document content 235 | """ 236 | self._no_content = True 237 | return self 238 | 239 | def no_stopwords(self): 240 | """ 241 | Prevent the query from being filtered for stopwords. 242 | Only useful in very big queries that you are certain contain no stopwords. 243 | """ 244 | self._no_stopwords = True 245 | return self 246 | 247 | def with_payloads(self): 248 | """ 249 | Ask the engine to return document payloads 250 | """ 251 | self._with_payloads = True 252 | return self 253 | 254 | def with_scores(self): 255 | """ 256 | Ask the engine to return document search scores 257 | """ 258 | self._with_scores = True 259 | return self 260 | 261 | def limit_fields(self, *fields): 262 | """ 263 | Limit the search to specific TEXT fields only 264 | 265 | - **fields**: A list of strings, case sensitive field names from the defined schema 266 | """ 267 | self._fields = fields 268 | return self 269 | 270 | def add_filter(self, flt): 271 | """ 272 | Add a numeric or geo filter to the query. 273 | **Currently only one of each filter is supported by the engine** 274 | 275 | - **flt**: A NumericFilter or GeoFilter object, used on a corresponding field 276 | """ 277 | 278 | self._filters.append(flt) 279 | return self 280 | 281 | def sort_by(self, field, asc=True): 282 | """ 283 | Add a sortby field to the query 284 | 285 | - **field** - the name of the field to sort by 286 | - **asc** - when `True`, sorting will be done in asceding order 287 | """ 288 | self._sortby = SortbyField(field, asc) 289 | return self 290 | 291 | 292 | class Filter(object): 293 | 294 | def __init__(self, keyword, field, *args): 295 | 296 | self.args = [keyword, field] + list(args) 297 | 298 | class NumericFilter(Filter): 299 | 300 | INF = '+inf' 301 | NEG_INF = '-inf' 302 | 303 | def __init__(self, field, minval, maxval, minExclusive = False, maxExclusive = False): 304 | 305 | args = [ 306 | minval if not minExclusive else '({}'.format(minval), 307 | maxval if not maxExclusive else '({}'.format(maxval), 308 | ] 309 | 310 | Filter.__init__(self, 'FILTER', field, *args) 311 | 312 | class GeoFilter(Filter): 313 | 314 | METERS = 'm' 315 | KILOMETERS = 'km' 316 | FEET = 'ft' 317 | MILES = 'mi' 318 | 319 | def __init__(self, field, lon, lat, radius, unit = KILOMETERS): 320 | 321 | Filter.__init__(self, 'GEOFILTER', field, lon, lat, radius, unit) 322 | 323 | class SortbyField(object): 324 | 325 | def __init__(self, field, asc=True): 326 | 327 | self.args = [field, 'ASC' if asc else 'DESC'] 328 | -------------------------------------------------------------------------------- /redisearch/querystring.py: -------------------------------------------------------------------------------- 1 | from six import string_types, integer_types 2 | 3 | def tags(*t): 4 | """ 5 | Indicate that the values should be matched to a tag field 6 | 7 | ### Parameters 8 | 9 | - **t**: Tags to search for 10 | """ 11 | if not t: 12 | raise ValueError('At least one tag must be specified') 13 | return TagValue(*t) 14 | 15 | 16 | def between(a, b, inclusive_min=True, inclusive_max=True): 17 | """ 18 | Indicate that value is a numeric range 19 | """ 20 | return RangeValue(a, b, 21 | inclusive_min=inclusive_min, inclusive_max=inclusive_max) 22 | 23 | def equal(n): 24 | """ 25 | Match a numeric value 26 | """ 27 | return between(n, n) 28 | 29 | 30 | def lt(n): 31 | """ 32 | Match any value less than n 33 | """ 34 | return between(None, n, inclusive_max=False) 35 | 36 | 37 | def le(n): 38 | """ 39 | Match any value less or equal to n 40 | """ 41 | return between(None, n, inclusive_max=True) 42 | 43 | 44 | def gt(n): 45 | """ 46 | Match any value greater than n 47 | """ 48 | return between(n, None, inclusive_min=False) 49 | 50 | 51 | def ge(n): 52 | """ 53 | Match any value greater or equal to n 54 | """ 55 | return between(n, None, inclusive_min=True) 56 | 57 | 58 | def geo(lat, lon, radius, unit='km'): 59 | """ 60 | Indicate that value is a geo region 61 | """ 62 | return GeoValue(lat, lon, radius, unit) 63 | 64 | 65 | class Value(object): 66 | @property 67 | def combinable(self): 68 | """ 69 | Whether this type of value may be combined with other values for the same 70 | field. This makes the filter potentially more efficient 71 | """ 72 | return False 73 | 74 | @staticmethod 75 | def make_value(v): 76 | """ 77 | Convert an object to a value, if it is not a value already 78 | """ 79 | if isinstance(v, Value): 80 | return v 81 | return ScalarValue(v) 82 | 83 | def to_string(self): 84 | raise NotImplementedError() 85 | 86 | def __str__(self): 87 | return self.to_string() 88 | 89 | 90 | class RangeValue(Value): 91 | combinable = False 92 | 93 | def __init__(self, a, b, inclusive_min=False, inclusive_max=False): 94 | if a is None: 95 | a = '-inf' 96 | if b is None: 97 | b = 'inf' 98 | self.range = [str(a), str(b)] 99 | self.inclusive_min = inclusive_min 100 | self.inclusive_max = inclusive_max 101 | 102 | def to_string(self): 103 | return '[{1}{0[0]} {2}{0[1]}]'.format(self.range, 104 | '(' if not self.inclusive_min else '', 105 | '(' if not self.inclusive_max else '',) 106 | 107 | 108 | class ScalarValue(Value): 109 | combinable = True 110 | 111 | def __init__(self, v): 112 | self.v = str(v) 113 | 114 | def to_string(self): 115 | return self.v 116 | 117 | 118 | class TagValue(Value): 119 | combinable = False 120 | 121 | def __init__(self, *tags): 122 | self.tags = tags 123 | 124 | def to_string(self): 125 | return '{' + ' | '.join(str(t) for t in self.tags) + '}' 126 | 127 | 128 | class GeoValue(Value): 129 | def __init__(self, lon, lat, radius, unit='km'): 130 | self.lon = lon 131 | self.lat = lat 132 | self.radius = radius 133 | self.unit = unit 134 | 135 | 136 | class Node(object): 137 | def __init__(self, *children, **kwparams): 138 | """ 139 | Create a node 140 | 141 | ### Parameters 142 | 143 | - **children**: One or more sub-conditions. These can be additional 144 | `intersect`, `disjunct`, `union`, `optional`, or any other `Node` 145 | type. 146 | 147 | The semantics of multiple conditions are dependent on the type of 148 | query. For an `intersection` node, this amounts to a logical AND, 149 | for a `union` node, this amounts to a logical `OR`. 150 | 151 | - **kwparams**: key-value parameters. Each key is the name of a field, 152 | and the value should be a field value. This can be one of the 153 | following: 154 | 155 | - Simple string (for text field matches) 156 | - value returned by one of the helper functions 157 | - list of either a string or a value 158 | 159 | 160 | ### Examples 161 | 162 | Field `num` should be between 1 and 10 163 | ``` 164 | intersect(num=between(1, 10) 165 | ``` 166 | 167 | Name can either be `bob` or `john` 168 | 169 | ``` 170 | union(name=('bob', 'john')) 171 | ``` 172 | 173 | Don't select countries in Israel, Japan, or US 174 | 175 | ``` 176 | disjunct_union(country=('il', 'jp', 'us')) 177 | ``` 178 | """ 179 | 180 | self.params = [] 181 | 182 | kvparams = {} 183 | for k, v in kwparams.items(): 184 | curvals = kvparams.setdefault(k, []) 185 | if isinstance(v, (string_types, integer_types, float)): 186 | curvals.append(Value.make_value(v)) 187 | elif isinstance(v, Value): 188 | curvals.append(v) 189 | else: 190 | curvals.extend(Value.make_value(subv) for subv in v) 191 | 192 | self.params += [Node.to_node(p) for p in children] 193 | 194 | for k, v in kvparams.items(): 195 | self.params.extend(self.join_fields(k, v)) 196 | 197 | def join_fields(self, key, vals): 198 | if len(vals) == 1: 199 | return [BaseNode('@{}:{}'.format(key, vals[0].to_string()))] 200 | if not vals[0].combinable: 201 | return [BaseNode('@{}:{}'.format(key, v.to_string())) for v in vals] 202 | s = BaseNode('@{}:({})'.format(key, self.JOINSTR.join(v.to_string() for v in vals))) 203 | return [s] 204 | 205 | @classmethod 206 | def to_node(cls, obj): # noqa 207 | if isinstance(obj, Node): 208 | return obj 209 | return BaseNode(obj) 210 | 211 | @property 212 | def JOINSTR(self): 213 | raise NotImplementedError() 214 | 215 | def to_string(self, with_parens=None): 216 | with_parens = self._should_use_paren(with_parens) 217 | pre, post = ('(', ')') if with_parens else ('', '') 218 | return "{}{}{}".format( 219 | pre, self.JOINSTR.join(n.to_string() for n in self.params), post) 220 | 221 | def _should_use_paren(self, optval): 222 | if optval is not None: 223 | return optval 224 | return len(self.params) > 1 225 | 226 | def __str__(self): 227 | return self.to_string() 228 | 229 | 230 | class BaseNode(Node): 231 | def __init__(self, s): 232 | super(BaseNode, self).__init__() 233 | self.s = str(s) 234 | 235 | def to_string(self, with_parens=None): 236 | return self.s 237 | 238 | 239 | class IntersectNode(Node): 240 | """ 241 | Create an intersection node. All children need to be satisfied in order for 242 | this node to evaluate as true 243 | """ 244 | JOINSTR = ' ' 245 | 246 | 247 | class UnionNode(Node): 248 | """ 249 | Create a union node. Any of the children need to be satisfied in order for 250 | this node to evaluate as true 251 | """ 252 | JOINSTR = '|' 253 | 254 | 255 | class DisjunctNode(IntersectNode): 256 | """ 257 | Create a disjunct node. In order for this node to be true, all of its 258 | children must evaluate to false 259 | """ 260 | def to_string(self, with_parens=None): 261 | with_parens = self._should_use_paren(with_parens) 262 | ret = super(DisjunctNode, self).to_string(with_parens=False) 263 | if with_parens: 264 | return '(-' + ret + ')' 265 | else: 266 | return '-' + ret 267 | 268 | 269 | class DistjunctUnion(DisjunctNode): 270 | """ 271 | This node is true if *all* of its children are false. This is equivalent to 272 | ``` 273 | disjunct(union(...)) 274 | ``` 275 | """ 276 | JOINSTR = '|' 277 | 278 | 279 | class OptionalNode(IntersectNode): 280 | """ 281 | Create an optional node. If this nodes evaluates to true, then the document 282 | will be rated higher in score/rank. 283 | """ 284 | def to_string(self, with_parens=None): 285 | with_parens = self._should_use_paren(with_parens) 286 | ret = super(OptionalNode, self).to_string(with_parens=False) 287 | if with_parens: 288 | return '(~' + ret + ')' 289 | else: 290 | return '~' + ret 291 | 292 | 293 | def intersect(*args, **kwargs): 294 | return IntersectNode(*args, **kwargs) 295 | 296 | 297 | def union(*args, **kwargs): 298 | return UnionNode(*args, **kwargs) 299 | 300 | 301 | def disjunct(*args, **kwargs): 302 | return DisjunctNode(*args, **kwargs) 303 | 304 | 305 | def disjunct_union(*args, **kwargs): 306 | return DistjunctUnion(*args, **kwargs) 307 | 308 | 309 | def querystring(*args, **kwargs): 310 | return intersect(*args, **kwargs).to_string() 311 | -------------------------------------------------------------------------------- /redisearch/reducers.py: -------------------------------------------------------------------------------- 1 | from .aggregation import Reducer, SortDirection 2 | 3 | 4 | class FieldOnlyReducer(Reducer): 5 | def __init__(self, field): 6 | super(FieldOnlyReducer, self).__init__(field) 7 | self._field = field 8 | 9 | 10 | class count(Reducer): 11 | """ 12 | Counts the number of results in the group 13 | """ 14 | NAME = 'COUNT' 15 | 16 | def __init__(self): 17 | super(count, self).__init__() 18 | 19 | 20 | class sum(FieldOnlyReducer): 21 | """ 22 | Calculates the sum of all the values in the given fields within the group 23 | """ 24 | NAME = 'SUM' 25 | 26 | def __init__(self, field): 27 | super(sum, self).__init__(field) 28 | 29 | 30 | class min(FieldOnlyReducer): 31 | """ 32 | Calculates the smallest value in the given field within the group 33 | """ 34 | NAME = 'MIN' 35 | 36 | def __init__(self, field): 37 | super(min, self).__init__(field) 38 | 39 | 40 | class max(FieldOnlyReducer): 41 | """ 42 | Calculates the largest value in the given field within the group 43 | """ 44 | NAME = 'MAX' 45 | 46 | def __init__(self, field): 47 | super(max, self).__init__(field) 48 | 49 | 50 | class avg(FieldOnlyReducer): 51 | """ 52 | Calculates the mean value in the given field within the group 53 | """ 54 | NAME = 'AVG' 55 | 56 | def __init__(self, field): 57 | super(avg, self).__init__(field) 58 | 59 | 60 | class tolist(FieldOnlyReducer): 61 | """ 62 | Returns all the matched properties in a list 63 | """ 64 | NAME = 'TOLIST' 65 | 66 | def __init__(self, field): 67 | super(tolist, self).__init__(field) 68 | 69 | 70 | class count_distinct(FieldOnlyReducer): 71 | """ 72 | Calculate the number of distinct values contained in all the results in 73 | the group for the given field 74 | """ 75 | NAME = 'COUNT_DISTINCT' 76 | 77 | def __init__(self, field): 78 | super(count_distinct, self).__init__(field) 79 | 80 | 81 | class count_distinctish(FieldOnlyReducer): 82 | """ 83 | Calculate the number of distinct values contained in all the results in the 84 | group for the given field. This uses a faster algorithm than 85 | `count_distinct` but is less accurate 86 | """ 87 | NAME = 'COUNT_DISTINCTISH' 88 | 89 | 90 | class quantile(Reducer): 91 | """ 92 | Return the value for the nth percentile within the range of values for the 93 | field within the group. 94 | """ 95 | NAME = 'QUANTILE' 96 | 97 | def __init__(self, field, pct): 98 | super(quantile, self).__init__(field, str(pct)) 99 | self._field = field 100 | 101 | 102 | class stddev(FieldOnlyReducer): 103 | """ 104 | Return the standard deviation for the values within the group 105 | """ 106 | NAME = 'STDDEV' 107 | 108 | def __init__(self, field): 109 | super(stddev, self).__init__(field) 110 | 111 | 112 | class first_value(Reducer): 113 | """ 114 | Selects the first value within the group according to sorting parameters 115 | """ 116 | NAME = 'FIRST_VALUE' 117 | 118 | def __init__(self, field, *byfields): 119 | """ 120 | Selects the first value of the given field within the group. 121 | 122 | ### Parameter 123 | 124 | - **field**: Source field used for the value 125 | - **byfields**: How to sort the results. This can be either the 126 | *class* of `aggregation.Asc` or `aggregation.Desc` in which 127 | case the field `field` is also used as the sort input. 128 | 129 | `byfields` can also be one or more *instances* of `Asc` or `Desc` 130 | indicating the sort order for these fields 131 | """ 132 | fieldstrs = [] 133 | if len(byfields) == 1 and isinstance(byfields[0], type) and \ 134 | issubclass(byfields[0], SortDirection): 135 | byfields = [byfields[0](field)] 136 | 137 | for f in byfields: 138 | fieldstrs += [f.field, f.DIRSTRING] 139 | 140 | args = [field] 141 | if fieldstrs: 142 | args += ['BY'] + fieldstrs 143 | super(first_value, self).__init__(*args) 144 | self._field = field 145 | 146 | 147 | class random_sample(Reducer): 148 | """ 149 | Returns a random sample of items from the dataset, from the given property 150 | """ 151 | NAME = 'RANDOM_SAMPLE' 152 | 153 | def __init__(self, field, size): 154 | """ 155 | ### Parameter 156 | 157 | **field**: Field to sample from 158 | **size**: Return this many items (can be less) 159 | """ 160 | args = [field, str(size)] 161 | super(random_sample, self).__init__(*args) 162 | self._field = field -------------------------------------------------------------------------------- /redisearch/result.py: -------------------------------------------------------------------------------- 1 | from six.moves import xrange, zip as izip 2 | 3 | from .document import Document 4 | from ._util import to_string 5 | 6 | 7 | 8 | class Result(object): 9 | """ 10 | Represents the result of a search query, and has an array of Document objects 11 | """ 12 | 13 | def __init__(self, res, hascontent, duration=0, has_payload=False, with_scores=False): 14 | """ 15 | - **snippets**: An optional dictionary of the form {field: snippet_size} for snippet formatting 16 | """ 17 | 18 | self.total = res[0] 19 | self.duration = duration 20 | self.docs = [] 21 | 22 | step = 1 23 | if hascontent: 24 | step = step + 1 25 | if has_payload: 26 | step = step + 1 27 | if with_scores: 28 | step = step + 1 29 | 30 | offset = 2 if with_scores else 1 31 | 32 | for i in xrange(1, len(res), step): 33 | id = to_string(res[i]) 34 | payload = to_string(res[i+offset]) if has_payload else None 35 | #fields_offset = 2 if has_payload else 1 36 | fields_offset = offset+1 if has_payload else offset 37 | score = float(res[i+1]) if with_scores else None 38 | 39 | fields = {} 40 | if hascontent: 41 | fields = dict( 42 | dict(izip(map(to_string, res[i + fields_offset][::2]), 43 | map(to_string, res[i + fields_offset][1::2]))) 44 | ) if hascontent else {} 45 | try: 46 | del fields['id'] 47 | except KeyError: 48 | pass 49 | 50 | try: 51 | fields['json'] = fields['$'] 52 | del fields['$'] 53 | except KeyError: 54 | pass 55 | 56 | doc = Document(id, score=score, payload=payload, **fields) if with_scores else Document(id, payload=payload, **fields) 57 | self.docs.append(doc) 58 | 59 | def __repr__(self): 60 | return 'Result{%d total, docs: %s}' % (self.total, self.docs) 61 | -------------------------------------------------------------------------------- /test/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | ARG PYTHON_VER 3 | 4 | FROM redislabs/redisearch:2.0.0 5 | 6 | RUN set -e ;\ 7 | apt-get -qq update ;\ 8 | apt-get install -y git 9 | 10 | WORKDIR /build 11 | 12 | RUN set -e ;\ 13 | mkdir -p deps ;\ 14 | cd deps ;\ 15 | git clone https://github.com/RedisLabsModules/readies.git 16 | 17 | RUN if [ "$PYTHON_VER" = 2 ]; then \ 18 | PIP=1 ./deps/readies/bin/getpy2 ;\ 19 | python2 --version ;\ 20 | else \ 21 | PIP=1 ./deps/readies/bin/getpy3 ;\ 22 | python3 --version ;\ 23 | fi 24 | 25 | ADD ./ /build 26 | 27 | RUN pip install -r requirements.txt 28 | RUN pip install --force-reinstall git+https://github.com/RedisLabs/rmtest.git 29 | 30 | ENV REDIS_PORT=6379 31 | 32 | ENTRYPOINT [ "/bin/bash", "-c", "/build/test/docker/test.sh" ] 33 | -------------------------------------------------------------------------------- /test/docker/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: all build test 3 | 4 | PYTHON_VER ?= 3 5 | 6 | all: test 7 | 8 | build: 9 | docker build --no-cache -t redisearch-py-test -f Dockerfile --build-arg PYTHON_VER=$(PYTHON_VER) ../.. 10 | 11 | test: build 12 | docker run --rm -it redisearch-py-test 13 | -------------------------------------------------------------------------------- /test/docker/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [[ $PYTHON_VER == 2 ]]; then 6 | PYTHON=python2 7 | else 8 | PYTHON=python3 9 | fi 10 | 11 | $PYTHON --version 12 | 13 | cd /build 14 | redis-server --loadmodule /usr/lib/redis/modules/redisearch.so & 15 | sleep 1 16 | $PYTHON test/test.py 17 | $PYTHON test/test_builder.py 18 | -------------------------------------------------------------------------------- /test/test-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)" 4 | ROOT=$(cd $HERE/..; pwd) 5 | 6 | cd $ROOT 7 | 8 | (mkdir -p deps; cd deps; git clone https://github.com/RedisLabsModules/readies.git) 9 | 10 | if [ "$PYTHON_VER" = 2 ]; then 11 | PIP=1 VENV=1 ./deps/readies/bin/getpy2 12 | python2 -m virtualenv venv2 13 | . ./venv2/bin/activate 14 | else 15 | PIP=1 VENV=1 ./deps/readies/bin/getpy3 16 | python3 -m virtualenv venv3 17 | . ./venv3/bin/activate 18 | fi 19 | 20 | python -m pip install -r requirements.txt 21 | python -m pip install --force-reinstall git+https://github.com/RedisLabs/rmtest.git 22 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os, sys 3 | 4 | 5 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) 6 | 7 | from rmtest import ModuleTestCase 8 | import redis 9 | import unittest 10 | import bz2 11 | import csv 12 | import time 13 | from io import TextIOWrapper 14 | 15 | import six 16 | 17 | from redisearch import * 18 | from redisearch.client import IndexType 19 | import redisearch.aggregation as aggregations 20 | import redisearch.reducers as reducers 21 | import rejson 22 | 23 | WILL_PLAY_TEXT = os.path.abspath(os.path.dirname(__file__)) + '/will_play_text.csv.bz2' 24 | 25 | TITLES_CSV = os.path.abspath(os.path.dirname(__file__)) + '/titles.csv' 26 | 27 | v = 0 28 | 29 | def waitForIndex(env, idx, timeout=None): 30 | delay = 0.1 31 | while True: 32 | res = env.execute_command('ft.info', idx) 33 | try: 34 | res.index('indexing') 35 | except: 36 | break 37 | 38 | if int(res[res.index('indexing') + 1]) == 0: 39 | break 40 | 41 | time.sleep(delay) 42 | if timeout is not None: 43 | timeout -= delay 44 | if timeout <= 0: 45 | break 46 | 47 | def check_version(env, version): 48 | global v 49 | if v == 0: 50 | v = env.execute_command('MODULE LIST')[0][3] 51 | if int(v) >= version: 52 | return True 53 | return False 54 | 55 | class RedisSearchTestCase(ModuleTestCase('../module.so')): 56 | 57 | def createIndex(self, client, num_docs=100, definition=None): 58 | 59 | assert isinstance(client, Client) 60 | try: 61 | client.create_index((TextField('play', weight=5.0), 62 | TextField('txt'), 63 | NumericField('chapter')), definition=definition) 64 | except redis.ResponseError: 65 | client.dropindex(delete_documents=True) 66 | return self.createIndex(client, num_docs=num_docs, definition=definition) 67 | 68 | chapters = {} 69 | bzfp = bz2.BZ2File(WILL_PLAY_TEXT) 70 | if six.PY3: 71 | bzfp = TextIOWrapper(bz2.BZ2File(WILL_PLAY_TEXT), encoding='utf8') 72 | 73 | r = csv.reader(bzfp, delimiter=';') 74 | for n, line in enumerate(r): 75 | # ['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."] 76 | 77 | play, chapter, character, text = line[1], line[2], line[4], line[5] 78 | 79 | key = '{}:{}'.format(play, chapter).lower() 80 | d = chapters.setdefault(key, {}) 81 | d['play'] = play 82 | d['txt'] = d.get('txt', '') + ' ' + text 83 | d['chapter'] = int(chapter or 0) 84 | if len(chapters) == num_docs: 85 | break 86 | 87 | indexer = client.batch_indexer(chunk_size=50) 88 | self.assertIsInstance(indexer, Client.BatchIndexer) 89 | self.assertEqual(50, indexer.chunk_size) 90 | 91 | for key, doc in six.iteritems(chapters): 92 | indexer.add_document(key, **doc) 93 | indexer.commit() 94 | 95 | def testClient(self): 96 | 97 | conn = self.redis() 98 | 99 | with conn as r: 100 | num_docs = 500 101 | r.flushdb() 102 | client = Client('test', port=conn.port) 103 | 104 | self.createIndex(client, num_docs=num_docs) 105 | 106 | for _ in r.retry_with_rdb_reload(): 107 | waitForIndex(r, 'test') 108 | # verify info 109 | info = client.info() 110 | for k in ['index_name', 'index_options', 'attributes', 'num_docs', 111 | 'max_doc_id', 'num_terms', 'num_records', 'inverted_sz_mb', 112 | 'offset_vectors_sz_mb', 'doc_table_size_mb', 'key_table_size_mb', 113 | 'records_per_doc_avg', 'bytes_per_record_avg', 'offsets_per_term_avg', 114 | 'offset_bits_per_record_avg']: 115 | self.assertIn(k, info) 116 | 117 | self.assertEqual(client.index_name, info['index_name']) 118 | self.assertEqual(num_docs, int(info['num_docs'])) 119 | 120 | res = client.search("henry iv") 121 | self.assertIsInstance(res, Result) 122 | assert isinstance(res, Result) 123 | self.assertEqual(225, res.total) 124 | self.assertEqual(10, len(res.docs)) 125 | self.assertGreater(res.duration, 0) 126 | 127 | for doc in res.docs: 128 | self.assertTrue(doc.id) 129 | self.assertEqual(doc.play, 'Henry IV') 130 | self.assertTrue(len(doc.txt) > 0) 131 | 132 | # test no content 133 | res = client.search(Query('king').no_content()) 134 | self.assertEqual(194, res.total) 135 | self.assertEqual(10, len(res.docs)) 136 | for doc in res.docs: 137 | self.assertNotIn('txt', doc.__dict__) 138 | self.assertNotIn('play', doc.__dict__) 139 | 140 | # test verbatim vs no verbatim 141 | total = client.search(Query('kings').no_content()).total 142 | vtotal = client.search(Query('kings').no_content().verbatim()).total 143 | self.assertGreater(total, vtotal) 144 | 145 | # test in fields 146 | txt_total = client.search(Query('henry').no_content().limit_fields('txt')).total 147 | play_total = client.search(Query('henry').no_content().limit_fields('play')).total 148 | both_total = client.search(Query('henry').no_content().limit_fields('play', 'txt')).total 149 | self.assertEqual(129, txt_total) 150 | self.assertEqual(494, play_total) 151 | self.assertEqual(494, both_total) 152 | 153 | # test load_document 154 | doc = client.load_document('henry vi part 3:62') 155 | self.assertIsNotNone(doc) 156 | self.assertEqual('henry vi part 3:62', doc.id) 157 | self.assertEqual(doc.play, 'Henry VI Part 3') 158 | self.assertTrue(len(doc.txt) > 0) 159 | 160 | # test in-keys 161 | ids = [x.id for x in client.search(Query('henry')).docs] 162 | self.assertEqual(10, len(ids)) 163 | subset = ids[:5] 164 | docs = client.search(Query('henry').limit_ids(*subset)) 165 | self.assertEqual(len(subset), docs.total) 166 | ids = [x.id for x in docs.docs] 167 | self.assertEqual(set(ids), set(subset)) 168 | 169 | # self.assertRaises(redis.ResponseError, client.search, Query('henry king').return_fields('play', 'nonexist')) 170 | 171 | # test slop and in order 172 | self.assertEqual(193, client.search(Query('henry king')).total) 173 | self.assertEqual(3, client.search(Query('henry king').slop(0).in_order()).total) 174 | self.assertEqual(52, client.search(Query('king henry').slop(0).in_order()).total) 175 | self.assertEqual(53, client.search(Query('henry king').slop(0)).total) 176 | self.assertEqual(167, client.search(Query('henry king').slop(100)).total) 177 | 178 | # test delete document 179 | client.add_document('doc-5ghs2', play='Death of a Salesman') 180 | res = client.search(Query('death of a salesman')) 181 | self.assertEqual(1, res.total) 182 | 183 | self.assertEqual(1, client.delete_document('doc-5ghs2')) 184 | res = client.search(Query('death of a salesman')) 185 | self.assertEqual(0, res.total) 186 | self.assertEqual(0, client.delete_document('doc-5ghs2')) 187 | 188 | client.add_document('doc-5ghs2', play='Death of a Salesman') 189 | res = client.search(Query('death of a salesman')) 190 | self.assertEqual(1, res.total) 191 | client.delete_document('doc-5ghs2') 192 | 193 | def getCleanClient(self, name): 194 | """ 195 | Gets a client client attached to an index name which is ready to be 196 | created 197 | """ 198 | client = Client(name, port=self.server.port) 199 | try: 200 | client.dropindex(delete_documents=True) 201 | except: 202 | pass 203 | 204 | return client 205 | 206 | def testAddHash(self): 207 | conn = self.redis() 208 | 209 | with conn as r: 210 | if check_version(r, 20000): 211 | return 212 | # Creating a client with a given index name 213 | client = Client('idx', port=conn.port) 214 | 215 | client.redis.flushdb() 216 | # Creating the index definition and schema 217 | client.create_index((TextField('title', weight=5.0), TextField('body'))) 218 | 219 | client.redis.hset( 220 | 'doc1', 221 | mapping={ 222 | 'title': 'RediSearch', 223 | 'body': 'Redisearch impements a search engine on top of redis' 224 | }) 225 | 226 | client.add_document_hash('doc1') 227 | 228 | # Searching with complext parameters: 229 | q = Query("search engine").verbatim().no_content().paging(0, 5) 230 | res = client.search(q) 231 | self.assertEqual('doc1', res.docs[0].id) 232 | 233 | def testPayloads(self): 234 | conn = self.redis() 235 | 236 | with conn as r: 237 | if not check_version(r, 20200): 238 | return 239 | # Creating a client with a given index name 240 | client = Client('idx', port=conn.port) 241 | client.redis.flushdb() 242 | client.create_index((TextField('txt'),)) 243 | 244 | client.add_document('doc1', payload='foo baz', txt='foo bar') 245 | client.add_document('doc2', txt='foo bar') 246 | 247 | q = Query("foo bar").with_payloads() 248 | res = client.search(q) 249 | self.assertEqual(2, res.total) 250 | self.assertEqual('doc1', res.docs[0].id) 251 | self.assertEqual('doc2', res.docs[1].id) 252 | self.assertEqual('foo baz', res.docs[0].payload) 253 | self.assertIsNone(res.docs[1].payload) 254 | 255 | def testScores(self): 256 | conn = self.redis() 257 | 258 | with conn as r: 259 | # Creating a client with a given index name 260 | client = Client('idx', port=conn.port) 261 | client.redis.flushdb() 262 | client.create_index((TextField('txt'),)) 263 | 264 | client.add_document('doc1', txt='foo baz') 265 | client.add_document('doc2', txt='foo bar') 266 | 267 | q = Query("foo ~bar").with_scores() 268 | res = client.search(q) 269 | self.assertEqual(2, res.total) 270 | 271 | self.assertEqual('doc2', res.docs[0].id) 272 | self.assertEqual(3.0, res.docs[0].score) 273 | 274 | self.assertEqual('doc1', res.docs[1].id) 275 | # todo: enable once new RS version is tagged 276 | # self.assertEqual(0.2, res.docs[1].score) 277 | 278 | def testReplace(self): 279 | 280 | conn = self.redis() 281 | 282 | with conn as r: 283 | # Creating a client with a given index name 284 | client = Client('idx', port=conn.port) 285 | client.redis.flushdb() 286 | client.create_index((TextField('txt'),)) 287 | 288 | client.add_document('doc1', txt='foo bar') 289 | client.add_document('doc2', txt='foo bar') 290 | 291 | res = client.search("foo bar") 292 | self.assertEqual(2, res.total) 293 | client.add_document('doc1', replace=True, txt='this is a replaced doc') 294 | 295 | res = client.search("foo bar") 296 | self.assertEqual(1, res.total) 297 | self.assertEqual('doc2', res.docs[0].id) 298 | 299 | res = client.search("replaced doc") 300 | self.assertEqual(1, res.total) 301 | self.assertEqual('doc1', res.docs[0].id) 302 | 303 | def testExpire(self): 304 | client = self.getCleanClient('idx') 305 | client.create_index((TextField('txt', sortable=True),), temporary=4) 306 | 307 | redis_client = redis.client.Redis() 308 | ttl = redis_client.execute_command('ft.debug', 'TTL', 'idx') 309 | self.assertTrue(ttl > 2) 310 | while ttl > 2: 311 | ttl = redis_client.execute_command('ft.debug', 'TTL', 'idx') 312 | time.sleep(0.01) 313 | 314 | # add document - should reset the ttl 315 | client.add_document('doc', txt='foo bar', text='this is a simple test') 316 | ttl = redis_client.execute_command('ft.debug', 'TTL', 'idx') 317 | self.assertTrue(ttl > 2) 318 | try: 319 | while True: 320 | ttl = redis_client.execute_command('ft.debug', 'TTL', 'idx') 321 | time.sleep(0.5) 322 | except redis.exceptions.ResponseError: 323 | self.assertEqual(ttl, 0) 324 | 325 | def testStopwords(self): 326 | # Creating a client with a given index name 327 | client = self.getCleanClient('idx') 328 | 329 | client.create_index((TextField('txt'),), stopwords=['foo', 'bar', 'baz']) 330 | client.add_document('doc1', txt='foo bar') 331 | client.add_document('doc2', txt='hello world') 332 | 333 | q1 = Query("foo bar").no_content() 334 | q2 = Query("foo bar hello world").no_content() 335 | res1, res2 = client.search(q1), client.search(q2) 336 | self.assertEqual(0, res1.total) 337 | self.assertEqual(1, res2.total) 338 | 339 | def testSkipInitialScan(self): 340 | client = self.getCleanClient('idx') 341 | client.redis.hset("doc1", "foo", "bar") 342 | q = Query('@foo:bar') 343 | 344 | client1 = self.getCleanClient('idx1') 345 | client1.create_index((TextField('foo'),)) 346 | waitForIndex(client1.redis, 'idx1') 347 | self.assertEqual(1, client1.search(q).total) 348 | client2 = self.getCleanClient('idx2') 349 | client2.create_index((TextField('foo'),), skip_initial_scan=True) 350 | waitForIndex(client2.redis, 'idx2') 351 | self.assertEqual(0, client2.search(q).total) 352 | 353 | def testFilters(self): 354 | conn = self.redis() 355 | 356 | with conn as r: 357 | # Creating a client with a given index name 358 | client = Client('idx', port=conn.port) 359 | client.redis.flushdb() 360 | 361 | client.create_index((TextField('txt'), NumericField('num'), GeoField('loc'))) 362 | client.add_document('doc1', txt='foo bar', num=3.141, loc='-0.441,51.458') 363 | client.add_document('doc2', txt='foo baz', num=2, loc='-0.1,51.2') 364 | 365 | for i in r.retry_with_rdb_reload(): 366 | waitForIndex(r, 'idx') 367 | # Test numerical filter 368 | q1 = Query("foo").add_filter(NumericFilter('num', 0, 2)).no_content() 369 | q2 = Query("foo").add_filter(NumericFilter('num', 2, NumericFilter.INF, minExclusive=True)).no_content() 370 | res1, res2 = client.search(q1), client.search(q2) 371 | 372 | self.assertEqual(1, res1.total) 373 | self.assertEqual(1, res2.total) 374 | self.assertEqual('doc2', res1.docs[0].id) 375 | self.assertEqual('doc1', res2.docs[0].id) 376 | 377 | # Test geo filter 378 | q1 = Query("foo").add_filter(GeoFilter('loc', -0.44, 51.45, 10)).no_content() 379 | q2 = Query("foo").add_filter(GeoFilter('loc', -0.44, 51.45, 100)).no_content() 380 | res1, res2 = client.search(q1), client.search(q2) 381 | 382 | self.assertEqual(1, res1.total) 383 | self.assertEqual(2, res2.total) 384 | self.assertEqual('doc1', res1.docs[0].id) 385 | 386 | # Sort results, after RDB reload order may change 387 | res = [res2.docs[0].id, res2.docs[1].id] 388 | res.sort() 389 | self.assertEqual(['doc1', 'doc2'], res) 390 | 391 | def testPayloadsWithNoContent(self): 392 | conn = self.redis() 393 | 394 | with conn as r: 395 | # Creating a client with a given index name 396 | client = Client('idx', port=conn.port) 397 | client.redis.flushdb() 398 | client.create_index((TextField('txt'),)) 399 | 400 | client.add_document('doc1', payload='foo baz', txt='foo bar') 401 | client.add_document('doc2', payload='foo baz2', txt='foo bar') 402 | 403 | q = Query("foo bar").with_payloads().no_content() 404 | res = client.search(q) 405 | self.assertEqual(2, len(res.docs)) 406 | 407 | def testSortby(self): 408 | conn = self.redis() 409 | 410 | with conn as r: 411 | # Creating a client with a given index name 412 | client = Client('idx', port=conn.port) 413 | client.redis.flushdb() 414 | 415 | client.create_index((TextField('txt'), NumericField('num', sortable=True))) 416 | client.add_document('doc1', txt='foo bar', num=1) 417 | client.add_document('doc2', txt='foo baz', num=2) 418 | client.add_document('doc3', txt='foo qux', num=3) 419 | 420 | # Test sort 421 | q1 = Query("foo").sort_by('num', asc=True).no_content() 422 | q2 = Query("foo").sort_by('num', asc=False).no_content() 423 | res1, res2 = client.search(q1), client.search(q2) 424 | 425 | self.assertEqual(3, res1.total) 426 | self.assertEqual('doc1', res1.docs[0].id) 427 | self.assertEqual('doc2', res1.docs[1].id) 428 | self.assertEqual('doc3', res1.docs[2].id) 429 | self.assertEqual(3, res2.total) 430 | self.assertEqual('doc1', res2.docs[2].id) 431 | self.assertEqual('doc2', res2.docs[1].id) 432 | self.assertEqual('doc3', res2.docs[0].id) 433 | 434 | def testDropIndex(self): 435 | """ 436 | Ensure the index gets dropped by data remains by default 437 | """ 438 | for x in range(20): 439 | conn = self.redis() 440 | with conn as r: 441 | if check_version(r, 20000): 442 | for keep_docs in [[True , {}], [False , {'name': 'haveit'}]]: 443 | idx = "HaveIt" 444 | index = Client(idx, port=conn.port) 445 | index.redis.hset("index:haveit", mapping = {'name': 'haveit'}) 446 | idef = IndexDefinition(prefix=['index:']) 447 | index.create_index((TextField('name'),),definition=idef) 448 | waitForIndex(index.redis, idx) 449 | index.dropindex(delete_documents=keep_docs[0]) 450 | i = index.redis.hgetall("index:haveit") 451 | self.assertEqual(i, keep_docs[1]) 452 | 453 | def testExample(self): 454 | conn = self.redis() 455 | 456 | with conn as r: 457 | # Creating a client with a given index name 458 | client = Client('myIndex', port=conn.port) 459 | client.redis.flushdb() 460 | 461 | # Creating the index definition and schema 462 | client.create_index((TextField('title', weight=5.0), TextField('body'))) 463 | 464 | # Indexing a document 465 | client.add_document('doc1', title='RediSearch', body='Redisearch impements a search engine on top of redis') 466 | 467 | # Searching with complex parameters: 468 | q = Query("search engine").verbatim().no_content().paging(0, 5) 469 | 470 | res = client.search(q) 471 | self.assertTrue(True) 472 | 473 | def testAutoComplete(self): 474 | with self.redis() as r: 475 | self.assertTrue(True) 476 | 477 | ac = AutoCompleter('ac', conn=r) 478 | n = 0 479 | with open(TITLES_CSV) as f: 480 | cr = csv.reader(f) 481 | 482 | for row in cr: 483 | n += 1 484 | term, score = row[0], float(row[1]) 485 | # print term, score 486 | self.assertEqual(n, ac.add_suggestions(Suggestion(term, score=score))) 487 | 488 | self.assertEqual(n, ac.len()) 489 | strs = [] 490 | for _ in r.retry_with_rdb_reload(): 491 | ret = ac.get_suggestions('bad', with_scores=True) 492 | self.assertEqual(2, len(ret)) 493 | self.assertEqual('badger', ret[0].string) 494 | self.assertIsInstance(ret[0].score, float) 495 | self.assertNotEqual(1.0, ret[0].score) 496 | self.assertEqual('badalte rishtey', ret[1].string) 497 | self.assertIsInstance(ret[1].score, float) 498 | self.assertNotEqual(1.0, ret[1].score) 499 | 500 | ret = ac.get_suggestions('bad', fuzzy=True, num=10) 501 | self.assertEqual(10, len(ret)) 502 | self.assertEqual(1.0, ret[0].score) 503 | strs = {x.string for x in ret} 504 | 505 | for sug in strs: 506 | self.assertEqual(1, ac.delete(sug)) 507 | # make sure a second delete returns 0 508 | for sug in strs: 509 | self.assertEqual(0, ac.delete(sug)) 510 | 511 | # make sure they were actually deleted 512 | ret2 = ac.get_suggestions('bad', fuzzy=True, num=10) 513 | for sug in ret2: 514 | self.assertNotIn(sug.string, strs) 515 | 516 | # Test with payload 517 | ac.add_suggestions(Suggestion('pay1', payload='pl1')) 518 | ac.add_suggestions(Suggestion('pay2', payload='pl2')) 519 | ac.add_suggestions(Suggestion('pay3', payload='pl3')) 520 | 521 | sugs = ac.get_suggestions('pay', with_payloads=True, with_scores=True) 522 | self.assertEqual(3, len(sugs)) 523 | for sug in sugs: 524 | self.assertTrue(sug.payload) 525 | self.assertTrue(sug.payload.startswith('pl')) 526 | 527 | def testNoIndex(self): 528 | # Creating a client with a given index name 529 | client = self.getCleanClient('idx') 530 | client.redis.flushdb() 531 | 532 | client.create_index( 533 | (TextField('field'), 534 | TextField('text', no_index=True, sortable=True), 535 | NumericField('numeric', no_index=True, sortable=True), 536 | GeoField('geo', no_index=True, sortable=True), 537 | TagField('tag', no_index=True, sortable=True))) 538 | 539 | client.add_document('doc1', field='aaa', text='1', numeric='1', geo='1,1', tag='1') 540 | client.add_document('doc2', field='aab', text='2', numeric='2', geo='2,2', tag='2') 541 | 542 | res = client.search(Query('@text:aa*')) 543 | self.assertEqual(0, res.total) 544 | 545 | res = client.search(Query('@field:aa*')) 546 | self.assertEqual(2, res.total) 547 | 548 | res = client.search(Query('*').sort_by('text', asc=False)) 549 | self.assertEqual(2, res.total) 550 | self.assertEqual('doc2', res.docs[0].id) 551 | 552 | res = client.search(Query('*').sort_by('text', asc=True)) 553 | self.assertEqual('doc1', res.docs[0].id) 554 | 555 | res = client.search(Query('*').sort_by('numeric', asc=True)) 556 | self.assertEqual('doc1', res.docs[0].id) 557 | 558 | res = client.search(Query('*').sort_by('geo', asc=True)) 559 | self.assertEqual('doc1', res.docs[0].id) 560 | 561 | res = client.search(Query('*').sort_by('tag', asc=True)) 562 | self.assertEqual('doc1', res.docs[0].id) 563 | 564 | # Ensure exception is raised for non-indexable, non-sortable fields 565 | self.assertRaises(Exception, TextField, 'name', no_index=True, sortable=False) 566 | self.assertRaises(Exception, NumericField, 'name', no_index=True, sortable=False) 567 | self.assertRaises(Exception, GeoField, 'name', no_index=True, sortable=False) 568 | self.assertRaises(Exception, TagField, 'name', no_index=True, sortable=False) 569 | 570 | def testPartial(self): 571 | client = self.getCleanClient('idx') 572 | client.create_index((TextField('f1'), TextField('f2'), TextField('f3'))) 573 | 574 | client.add_document('doc1', f1='f1_val', f2='f2_val') 575 | client.add_document('doc2', f1='f1_val', f2='f2_val') 576 | 577 | client.add_document('doc1', f3='f3_val', partial=True) 578 | client.add_document('doc2', f3='f3_val', replace=True) 579 | 580 | for i in self.retry_with_reload(): 581 | waitForIndex(client.redis, 'idx') 582 | # Search for f3 value. All documents should have it 583 | res = client.search('@f3:f3_val') 584 | self.assertEqual(2, res.total) 585 | 586 | # Only the document updated with PARTIAL should still have the f1 and f2 values 587 | res = client.search('@f3:f3_val @f2:f2_val @f1:f1_val') 588 | self.assertEqual(1, res.total) 589 | 590 | def testNoCreate(self): 591 | client = self.getCleanClient('idx') 592 | client.create_index((TextField('f1'), TextField('f2'), TextField('f3'))) 593 | 594 | client.add_document('doc1', f1='f1_val', f2='f2_val') 595 | client.add_document('doc2', f1='f1_val', f2='f2_val') 596 | 597 | client.add_document('doc1', f3='f3_val', no_create=True) 598 | client.add_document('doc2', f3='f3_val', no_create=True, partial=True) 599 | 600 | for i in self.retry_with_reload(): 601 | waitForIndex(client.redis, 'idx') 602 | # Search for f3 value. All documents should have it 603 | res = client.search('@f3:f3_val') 604 | self.assertEqual(2, res.total) 605 | 606 | # Only the document updated with PARTIAL should still have the f1 and f2 values 607 | res = client.search('@f3:f3_val @f2:f2_val @f1:f1_val') 608 | self.assertEqual(1, res.total) 609 | 610 | with self.assertRaises(redis.ResponseError) as error: 611 | client.add_document('doc3', f2='f2_val', f3='f3_val', no_create=True) 612 | 613 | def testExplain(self): 614 | client = self.getCleanClient('idx') 615 | client.create_index((TextField('f1'), TextField('f2'), TextField('f3'))) 616 | res = client.explain('@f3:f3_val @f2:f2_val @f1:f1_val') 617 | self.assertTrue(res) 618 | 619 | def testSummarize(self): 620 | client = self.getCleanClient('idx') 621 | self.createIndex(client) 622 | 623 | for _ in self.retry_with_reload(): 624 | waitForIndex(client.redis, 'idx') 625 | q = Query('king henry').paging(0, 1) 626 | q.highlight(fields=('play', 'txt'), tags=('', '')) 627 | q.summarize('txt') 628 | 629 | doc = sorted(client.search(q).docs)[0] 630 | self.assertEqual('Henry IV', doc.play) 631 | self.assertEqual('ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... ', 632 | doc.txt) 633 | 634 | q = Query('king henry').paging(0, 1).summarize().highlight() 635 | 636 | doc = sorted(client.search(q).docs)[0] 637 | self.assertEqual('Henry ... ', doc.play) 638 | self.assertEqual('ACT I SCENE I. London. The palace. Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR... ', 639 | doc.txt) 640 | 641 | def testSummarizeDisabled(self): 642 | # test NOOFFSETS 643 | client = self.getCleanClient('idx') 644 | client.create_index((TextField('txt'),), no_term_offsets=True) 645 | client.add_document('doc1', txt='foo bar') 646 | with self.assertRaises(Exception) as context: 647 | client.search(Query('foo').summarize(fields=['txt'])) 648 | self.assertEqual('Cannot use highlight/summarize because NOOFSETS was specified at index level', 649 | str(context.exception)) 650 | 651 | # test NOHL 652 | client = self.getCleanClient('idx') 653 | client.create_index((TextField('txt'),), no_highlight=True) 654 | client.add_document('doc1', txt='foo bar') 655 | with self.assertRaises(Exception) as context: 656 | client.search(Query('foo').summarize(fields=['txt'])) 657 | self.assertEqual('Cannot use highlight/summarize because NOOFSETS was specified at index level', 658 | str(context.exception)) 659 | 660 | def testAlias(self): 661 | conn = self.redis() 662 | with conn as r: 663 | if check_version(r, 20000): 664 | 665 | index1 = Client('testAlias', port=conn.port) 666 | index1.redis.flushdb() 667 | index2 = Client('testAlias2', port=conn.port) 668 | 669 | index1.redis.hset("index1:lonestar", mapping={'name': 'lonestar'}) 670 | index2.redis.hset("index2:yogurt", mapping={'name': 'yogurt'}) 671 | 672 | time.sleep(2) 673 | 674 | def1 = IndexDefinition(prefix=['index1:'], score_field='name') 675 | def2 = IndexDefinition(prefix=['index2:'], score_field='name') 676 | 677 | index1.create_index((TextField('name'),), definition=def1) 678 | index2.create_index((TextField('name'),), definition=def2) 679 | 680 | res = index1.search('*').docs[0] 681 | self.assertEqual('index1:lonestar', res.id) 682 | 683 | # create alias and check for results 684 | index1.aliasadd("spaceballs") 685 | alias_client = Client('spaceballs', port=conn.port) 686 | res = alias_client.search('*').docs[0] 687 | self.assertEqual('index1:lonestar', res.id) 688 | 689 | # We should throw an exception when trying to add an alias that already exists 690 | with self.assertRaises(Exception) as context: 691 | index2.aliasadd('spaceballs') 692 | self.assertEqual('Alias already exists', str(context.exception)) 693 | 694 | # update alias and ensure new results 695 | index2.aliasupdate("spaceballs") 696 | alias_client2 = Client('spaceballs', port=conn.port) 697 | res = alias_client2.search('*').docs[0] 698 | self.assertEqual('index2:yogurt', res.id) 699 | 700 | index2.aliasdel("spaceballs") 701 | with self.assertRaises(Exception) as context: 702 | alias_client2.search('*').docs[0] 703 | self.assertEqual('spaceballs: no such index', str(context.exception)) 704 | 705 | else: 706 | # Creating a client with one index 707 | index1 = Client('testAlias', port=conn.port) 708 | index1.redis.flushdb() 709 | 710 | index1.create_index((TextField('txt'),)) 711 | index1.add_document('doc1', txt = 'text goes here') 712 | 713 | index2 = Client('testAlias2', port=conn.port) 714 | index2.create_index((TextField('txt'),)) 715 | index2.add_document('doc2', txt = 'text goes here') 716 | 717 | # add the actual alias and check 718 | index1.aliasadd('myalias') 719 | alias_client = Client('myalias', port=conn.port) 720 | res = alias_client.search('*').docs[0] 721 | self.assertEqual('doc1', res.id) 722 | 723 | # We should throw an exception when trying to add an alias that already exists 724 | with self.assertRaises(Exception) as context: 725 | index2.aliasadd('myalias') 726 | self.assertEqual('Alias already exists', str(context.exception)) 727 | 728 | # update the alias and ensure we get doc2 729 | index2.aliasupdate('myalias') 730 | alias_client2 = Client('myalias', port=conn.port) 731 | res = alias_client2.search('*').docs[0] 732 | self.assertEqual('doc2', res.id) 733 | 734 | # delete the alias and expect an error if we try to query again 735 | index2.aliasdel('myalias') 736 | with self.assertRaises(Exception) as context: 737 | alias_client2.search('*').docs[0] 738 | self.assertEqual('myalias: no such index', str(context.exception)) 739 | 740 | def testTags(self): 741 | conn = self.redis() 742 | 743 | with conn as r: 744 | # Creating a client with a given index name 745 | client = Client('idx', port=conn.port) 746 | client.redis.flushdb() 747 | 748 | client.create_index((TextField('txt'), TagField('tags'))) 749 | 750 | tags = 'foo,foo bar,hello;world' 751 | tags2 = 'soba,ramen' 752 | 753 | client.add_document('doc1', txt='fooz barz', tags=tags) 754 | client.add_document('doc2', txt='noodles', tags=tags2) 755 | 756 | for _ in r.retry_with_rdb_reload(): 757 | waitForIndex(r, 'idx') 758 | q = Query("@tags:{foo}") 759 | res = client.search(q) 760 | self.assertEqual(1, res.total) 761 | 762 | q = Query("@tags:{foo bar}") 763 | res = client.search(q) 764 | self.assertEqual(1, res.total) 765 | 766 | q = Query("@tags:{foo\\ bar}") 767 | res = client.search(q) 768 | self.assertEqual(1, res.total) 769 | 770 | q = Query("@tags:{hello\\;world}") 771 | res = client.search(q) 772 | self.assertEqual(1, res.total) 773 | 774 | q2 = client.tagvals('tags') 775 | self.assertEqual((tags.split(',') + tags2.split(',')).sort(), q2.sort()) 776 | 777 | def testTextFieldSortableNostem(self): 778 | conn = self.redis() 779 | 780 | with conn as r: 781 | # Creating a client with a given index name 782 | client = Client('sortableNostem', port=conn.port) 783 | client.redis.flushdb() 784 | 785 | # Creating the index definition with sortable and no_stem 786 | client.create_index((TextField('txt', sortable=True, no_stem=True),)) 787 | 788 | # Now get the index info to confirm its contents 789 | response = client.info() 790 | self.assertIn('SORTABLE', response['attributes'][0]) 791 | self.assertIn('NOSTEM', response['attributes'][0]) 792 | 793 | def testMaxTextFields(self): 794 | conn = self.redis() 795 | 796 | with conn as r: 797 | # Creating a client 798 | client = Client('idx1', port=conn.port) 799 | client.redis.flushdb() 800 | 801 | # Creating the index definition 802 | client.create_index((TextField('f0'),)) 803 | # Fill the index with fields 804 | for x in range(1, 32): 805 | client.alter_schema_add((TextField('f{}'.format(x)),)) 806 | # OK for now. 807 | 808 | # Should be too many indexes 809 | with self.assertRaises(redis.ResponseError): 810 | client.alter_schema_add((TextField('f{}'.format(x)),)) 811 | 812 | # Creating new client 813 | client = Client('idx2', port=conn.port) 814 | client.redis.flushdb() 815 | 816 | # Creating the index definition 817 | client.create_index((TextField('f0'),), max_text_fields=True) 818 | # Fill the index with fields 819 | for x in range(1, 50): 820 | client.alter_schema_add((TextField('f{}'.format(x)),)) 821 | 822 | def testAlterSchemaAdd(self): 823 | conn = self.redis() 824 | 825 | with conn as r: 826 | # Creating a client with a given index name 827 | client = Client('alterIdx', port=conn.port) 828 | client.redis.flushdb() 829 | 830 | # Creating the index definition and schema 831 | client.create_index(TextField('title')) 832 | 833 | # Using alter to add a field 834 | client.alter_schema_add(TextField('body')) 835 | 836 | # Indexing a document 837 | client.add_document('doc1', title='MyTitle', body='Some content only in the body') 838 | 839 | # Searching with parameter only in the body (the added field) 840 | q = Query("only in the body") 841 | 842 | # Ensure we find the result searching on the added body field 843 | res = client.search(q) 844 | self.assertEqual(1, res.total) 845 | 846 | def testSpellCheck(self): 847 | client = self.getCleanClient('idx') 848 | client.create_index((TextField('f1'), TextField('f2'))) 849 | 850 | client.add_document('doc1', f1='some valid content', f2='this is sample text') 851 | client.add_document('doc2', f1='very important', f2='lorem ipsum') 852 | 853 | for i in self.retry_with_reload(): 854 | waitForIndex(client.redis, 'idx') 855 | res = client.spellcheck('impornant') 856 | self.assertEqual('important', res['impornant'][0]['suggestion']) 857 | 858 | res = client.spellcheck('contnt') 859 | self.assertEqual('content', res['contnt'][0]['suggestion']) 860 | 861 | def testDictOps(self): 862 | client = self.getCleanClient('idx') 863 | client.create_index((TextField('f1'), TextField('f2'))) 864 | 865 | for _ in self.retry_with_reload(): 866 | waitForIndex(client.redis, 'idx') 867 | # Add three items 868 | res = client.dict_add('custom_dict', 'item1', 'item2', 'item3') 869 | self.assertEqual(3, res) 870 | 871 | # Remove one item 872 | res = client.dict_del('custom_dict', 'item2') 873 | self.assertEqual(1, res) 874 | 875 | # Dump dict and inspect content 876 | res = client.dict_dump('custom_dict') 877 | self.assertEqual(['item1', 'item3'], res) 878 | 879 | # Remove rest of the items before reload 880 | client.dict_del('custom_dict', *res) 881 | 882 | def testPhoneticMatcher(self): 883 | conn = self.redis() 884 | 885 | with conn as r: 886 | # Creating a client with a given index name 887 | client = Client('myIndex', port=conn.port) 888 | client.redis.flushdb() 889 | 890 | client.create_index((TextField('name'),)) 891 | 892 | client.add_document('doc1', name='Jon') 893 | client.add_document('doc2', name='John') 894 | 895 | res = client.search(Query("Jon")) 896 | self.assertEqual(1, len(res.docs)) 897 | self.assertEqual('Jon', res.docs[0].name) 898 | 899 | # Drop and create index with phonetic matcher 900 | client.redis.flushdb() 901 | 902 | client.create_index((TextField('name', phonetic_matcher='dm:en'),)) 903 | 904 | client.add_document('doc1', name='Jon') 905 | client.add_document('doc2', name='John') 906 | 907 | res = client.search(Query("Jon")) 908 | self.assertEqual(2, len(res.docs)) 909 | self.assertEqual(['John', 'Jon'], sorted([d.name for d in res.docs])) 910 | 911 | def testScorer(self): 912 | # Creating a client with a given index name 913 | client = self.getCleanClient('idx') 914 | 915 | client.create_index((TextField('description'),)) 916 | 917 | client.add_document('doc1', description='The quick brown fox jumps over the lazy dog') 918 | client.add_document('doc2', description='Quick alice was beginning to get very tired of sitting by her quick sister on the bank, and of having nothing to do.') 919 | 920 | # default scorer is TFIDF 921 | res = client.search(Query('quick').with_scores()) 922 | self.assertEqual(1.0, res.docs[0].score) 923 | res = client.search(Query('quick').scorer('TFIDF').with_scores()) 924 | self.assertEqual(1.0, res.docs[0].score) 925 | 926 | res = client.search(Query('quick').scorer('TFIDF.DOCNORM').with_scores()) 927 | self.assertEqual(0.1111111111111111, res.docs[0].score) 928 | 929 | res = client.search(Query('quick').scorer('BM25').with_scores()) 930 | self.assertEqual(0.17699114465425977, res.docs[0].score) 931 | 932 | res = client.search(Query('quick').scorer('DISMAX').with_scores()) 933 | self.assertEqual(2.0, res.docs[0].score) 934 | 935 | res = client.search(Query('quick').scorer('DOCSCORE').with_scores()) 936 | self.assertEqual(1.0, res.docs[0].score) 937 | 938 | res = client.search(Query('quick').scorer('HAMMING').with_scores()) 939 | self.assertEqual(0.0, res.docs[0].score) 940 | 941 | def testGet(self): 942 | client = self.getCleanClient('idx') 943 | client.create_index((TextField('f1'), TextField('f2'))) 944 | 945 | self.assertEqual([None], client.get('doc1')) 946 | self.assertEqual([None, None], client.get('doc2', 'doc1')) 947 | 948 | client.add_document('doc1', f1='some valid content dd1', f2='this is sample text ff1') 949 | client.add_document('doc2', f1='some valid content dd2', f2='this is sample text ff2') 950 | 951 | self.assertEqual([['f1', 'some valid content dd2', 'f2', 'this is sample text ff2']], client.get('doc2')) 952 | self.assertEqual([['f1', 'some valid content dd1', 'f2', 'this is sample text ff1'], ['f1', 'some valid content dd2', 'f2', 'this is sample text ff2']], client.get('doc1', 'doc2')) 953 | 954 | def testConfig(self): 955 | client = self.getCleanClient('idx') 956 | if not check_version(client.redis, 20200): 957 | return 958 | self.assertTrue(client.config_set('TIMEOUT', '100')) 959 | with self.assertRaises(redis.ResponseError) as error: 960 | client.config_set('TIMEOUT', "null") 961 | res = client.config_get('*') 962 | self.assertEqual('100', res['TIMEOUT']) 963 | res = client.config_get('TIMEOUT') 964 | self.assertEqual('100', res['TIMEOUT']) 965 | 966 | def testAggregations(self): 967 | conn = self.redis() 968 | 969 | with conn as r: 970 | client = Client('myIndex', port=conn.port) 971 | client.redis.flushdb() 972 | 973 | # Creating the index definition and schema 974 | client.create_index((NumericField('random_num'), TextField('title'), 975 | TextField('body'), TextField('parent'))) 976 | 977 | # Indexing a document 978 | client.add_document( 979 | 'search', 980 | title='RediSearch', 981 | body='Redisearch impements a search engine on top of redis', 982 | parent='redis', 983 | random_num=10) 984 | client.add_document( 985 | 'ai', 986 | title='RedisAI', 987 | body= 988 | 'RedisAI executes Deep Learning/Machine Learning models and managing their data.', 989 | parent='redis', 990 | random_num=3) 991 | client.add_document( 992 | 'json', 993 | title='RedisJson', 994 | body= 995 | 'RedisJSON implements ECMA-404 The JSON Data Interchange Standard as a native data type.', 996 | parent='redis', 997 | random_num=8) 998 | 999 | req = aggregations.AggregateRequest('redis').group_by( 1000 | "@parent", 1001 | reducers.count(), 1002 | reducers.count_distinct('@title'), 1003 | reducers.count_distinctish('@title'), 1004 | reducers.sum("@random_num"), 1005 | reducers.min("@random_num"), 1006 | reducers.max("@random_num"), 1007 | reducers.avg("@random_num"), 1008 | reducers.stddev("random_num"), 1009 | reducers.quantile("@random_num", 0.5), 1010 | reducers.tolist("@title"), 1011 | reducers.first_value("@title"), 1012 | reducers.random_sample("@title", 2), 1013 | ) 1014 | 1015 | res = client.aggregate(req) 1016 | 1017 | res = res.rows[0] 1018 | self.assertEqual(len(res), 26) 1019 | self.assertEqual('redis', res[1]) 1020 | self.assertEqual('3', res[3]) 1021 | self.assertEqual('3', res[5]) 1022 | self.assertEqual('3', res[7]) 1023 | self.assertEqual('21', res[9]) 1024 | self.assertEqual('3', res[11]) 1025 | self.assertEqual('10', res[13]) 1026 | self.assertEqual('7', res[15]) 1027 | self.assertEqual('3.60555127546', res[17]) 1028 | self.assertEqual('8', res[19]) 1029 | self.assertEqual(set(['RediSearch', 'RedisAI', 'RedisJson']), set(res[21])) 1030 | self.assertEqual('RediSearch', res[23]) 1031 | self.assertEqual(2, len(res[25])) 1032 | 1033 | def testIndexDefinition(self): 1034 | """ 1035 | Create definition and test its args 1036 | """ 1037 | conn = self.redis() 1038 | 1039 | with conn as r: 1040 | r.flushdb() 1041 | if not check_version(r, 20000): 1042 | return 1043 | client = Client('test', port=conn.port) 1044 | 1045 | self.assertRaises(RuntimeError, IndexDefinition, prefix=['hset:', 'henry'], index_type='json') 1046 | 1047 | definition = IndexDefinition(prefix=['hset:', 'henry'], 1048 | filter='@f1==32', language='English', language_field='play', 1049 | score_field='chapter', score=0.5, payload_field='txt', index_type=IndexType.JSON) 1050 | 1051 | self.assertEqual(['ON', 'JSON', 'PREFIX', 2, 'hset:', 'henry', 1052 | 'FILTER', '@f1==32', 'LANGUAGE_FIELD', 'play', 'LANGUAGE', 'English', 1053 | 'SCORE_FIELD', 'chapter', 'SCORE', 0.5, 'PAYLOAD_FIELD', 'txt'], 1054 | definition.args) 1055 | 1056 | self.createIndex(client, num_docs=500, definition=definition) 1057 | 1058 | def testCreateClientDefinition(self): 1059 | """ 1060 | Create definition with no index type provided, 1061 | and use hset to test the client definition (the default is HASH). 1062 | """ 1063 | conn = self.redis() 1064 | 1065 | with conn as r: 1066 | r.flushdb() 1067 | if not check_version(r, 20000): 1068 | return 1069 | client = Client('test', port=conn.port) 1070 | 1071 | definition = IndexDefinition(prefix=['hset:', 'henry']) 1072 | self.createIndex(client, num_docs=500, definition=definition) 1073 | 1074 | info = client.info() 1075 | self.assertEqual(494, int(info['num_docs'])) 1076 | 1077 | r.hset('hset:1', 'f1', 'v1'); 1078 | 1079 | info = client.info() 1080 | self.assertEqual(495, int(info['num_docs'])) 1081 | 1082 | def testCreateClientDefinitionHash(self): 1083 | """ 1084 | Create definition with IndexType.HASH as index type (ON HASH), 1085 | and use hset to test the client definition. 1086 | """ 1087 | conn = self.redis() 1088 | 1089 | with conn as r: 1090 | r.flushdb() 1091 | if not check_version(r, 20000): 1092 | return 1093 | client = Client('test', port=conn.port) 1094 | 1095 | definition = IndexDefinition(prefix=['hset:', 'henry'], index_type=IndexType.HASH) 1096 | self.createIndex(client, num_docs=500, definition=definition) 1097 | 1098 | info = client.info() 1099 | self.assertEqual(494, int(info['num_docs'])) 1100 | 1101 | r.hset('hset:1', 'f1', 'v1'); 1102 | 1103 | info = client.info() 1104 | self.assertEqual(495, int(info['num_docs'])) 1105 | 1106 | def testCreateClientDefinitionJson(self): 1107 | """ 1108 | Create definition with IndexType.JSON as index type (ON JSON), 1109 | and use json client to test it. 1110 | """ 1111 | conn = self.redis() 1112 | 1113 | with conn as r: 1114 | r.flushdb() 1115 | if not check_version(r, 20200): 1116 | return 1117 | 1118 | client = Client('json1', port=conn.port) 1119 | 1120 | definition = IndexDefinition(prefix=['king:'], index_type=IndexType.JSON) 1121 | client.create_index((TextField('$.name'),), definition=definition) 1122 | 1123 | rj = rejson.Client(host='localhost', port=conn.port, decode_responses=True) 1124 | rj.jsonset('king:1', rejson.Path.rootPath(), {'name': 'henry'}) 1125 | rj.jsonset('king:2', rejson.Path.rootPath(), {'name': 'james'}) 1126 | 1127 | res = client.search('henry') 1128 | self.assertEqual(res.docs[0].id, 'king:1') 1129 | self.assertIsNone(res.docs[0].payload) 1130 | self.assertEqual(res.docs[0].json, '{"name":"henry"}') 1131 | self.assertEqual(res.total, 1) 1132 | 1133 | def testFieldsAsName(self): 1134 | conn = self.redis() 1135 | 1136 | with conn as r: 1137 | r.flushdb() 1138 | if not check_version(r, 20200): 1139 | return 1140 | 1141 | # create index 1142 | SCHEMA = ( 1143 | TextField("$.name", sortable=True, as_name='name'), 1144 | NumericField("$.age", as_name='just_a_number'), 1145 | ) 1146 | definition = IndexDefinition(index_type=IndexType.JSON) 1147 | json_client = Client('idxJson') 1148 | json_client.create_index(SCHEMA, definition=definition) 1149 | 1150 | # insert json data 1151 | rj = rejson.Client(host='localhost', port=conn.port, decode_responses=True) 1152 | res = rj.jsonset('doc:1', rejson.Path.rootPath(), {'name': 'Jon', 'age': 25}) 1153 | self.assertTrue(res) 1154 | 1155 | total = json_client.search(Query('Jon').return_fields('name', 'just_a_number')).docs 1156 | self.assertEqual(1, len(total)) 1157 | self.assertEqual('doc:1', total[0].id) 1158 | self.assertEqual('Jon', total[0].name) 1159 | self.assertEqual('25', total[0].just_a_number) 1160 | 1161 | def testSearchReturnFields(self): 1162 | conn = self.redis() 1163 | 1164 | with conn as r: 1165 | r.flushdb() 1166 | if not check_version(r, 20200): 1167 | return 1168 | 1169 | # insert json data 1170 | rj = rejson.Client(host='localhost', port=conn.port, decode_responses=True) 1171 | res = rj.jsonset('doc:1', rejson.Path.rootPath(), 1172 | {"t": "riceratops", "t2": "telmatosaurus", "n": 9072, "flt": 97.2}) 1173 | self.assertTrue(res) 1174 | 1175 | # create index json 1176 | definition = IndexDefinition(index_type=IndexType.JSON) 1177 | SCHEMA = ( 1178 | TextField("$.t"), 1179 | NumericField("$.flt"), 1180 | ) 1181 | json_client = Client('idxJson') 1182 | json_client.create_index(SCHEMA, definition=definition) 1183 | waitForIndex(r, 'idxJson') 1184 | 1185 | total = json_client.search(Query('*').return_field("$.t", as_field="txt")).docs 1186 | self.assertEqual(1, len(total)) 1187 | self.assertEqual('doc:1', total[0].id) 1188 | self.assertEqual('riceratops', total[0].txt) 1189 | 1190 | total = json_client.search(Query('*').return_field("$.t2", as_field="txt")).docs 1191 | self.assertEqual(1, len(total)) 1192 | self.assertEqual('doc:1', total[0].id) 1193 | self.assertEqual('telmatosaurus', total[0].txt) 1194 | 1195 | 1196 | if __name__ == '__main__': 1197 | unittest.main() 1198 | -------------------------------------------------------------------------------- /test/test_builder.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) 3 | 4 | import unittest 5 | import redisearch.aggregation as a 6 | import redisearch.querystring as q 7 | import redisearch.reducers as r 8 | 9 | class QueryBuilderTest(unittest.TestCase): 10 | def testBetween(self): 11 | b = q.between(1, 10) 12 | self.assertEqual('[1 10]', str(b)) 13 | b = q.between(None, 10) 14 | self.assertEqual('[-inf 10]', str(b)) 15 | b = q.between(1, 10, inclusive_min=False) 16 | self.assertEqual('[(1 10]', str(b)) 17 | 18 | def testTags(self): 19 | self.assertRaises(ValueError, q.tags) 20 | self.assertEqual('{1 | 2 | 3}', str(q.tags(1, 2, 3))) 21 | self.assertEqual('{foo}', str(q.tags('foo'))) 22 | 23 | def testUnion(self): 24 | u = q.union() 25 | self.assertEqual('', str(u)) 26 | u = q.union(foo='fooval', bar='barval') 27 | self.assertEqual('(@foo:fooval|@bar:barval)', str(u)) 28 | u = q.union(q.intersect(foo=1, bar=2), q.intersect(foo=3, bar=4)) 29 | self.assertEqual('((@foo:1 @bar:2)|(@foo:3 @bar:4))', str(u)) 30 | 31 | def testSpecialNodes(self): 32 | u = q.union(num=q.between(1, 10)) 33 | self.assertEqual('@num:[1 10]', str(u)) 34 | u = q.union(num=[q.between(1, 10), q.between(100, 200)]) 35 | self.assertEqual('(@num:[1 10]|@num:[100 200])', str(u)) 36 | u = q.union(num=[q.tags('t1', 't2', 't3'), q.tags('t100', 't200', 't300')]) 37 | self.assertEqual('(@num:{t1 | t2 | t3}|@num:{t100 | t200 | t300})', str(u)) 38 | 39 | def testGroup(self): 40 | # Check the group class on its own 41 | self.assertRaises(ValueError, a.Group, [], []) 42 | self.assertRaises(ValueError, a.Group, ['foo'], []) 43 | 44 | # Zero fields, single reducer 45 | g = a.Group([], r.count()) 46 | ret = g.build_args() 47 | self.assertEqual(['GROUPBY', '0', 'REDUCE', 'COUNT', '0'], ret) 48 | 49 | # Single field, single reducer 50 | g = a.Group('foo', r.count()) 51 | ret = g.build_args() 52 | self.assertEqual(['GROUPBY', '1', 'foo', 'REDUCE', 'COUNT', '0'], ret) 53 | 54 | # Multiple fields, single reducer 55 | g = a.Group(['foo', 'bar'], r.count()) 56 | self.assertEqual(['GROUPBY', '2', 'foo', 'bar', 'REDUCE', 'COUNT', '0'], 57 | g.build_args()) 58 | 59 | # Multiple fields, multiple reducers 60 | g = a.Group(['foo', 'bar'], [r.count(), r.count_distinct('@fld1')]) 61 | self.assertEqual(['GROUPBY', '2', 'foo', 'bar', 'REDUCE', 'COUNT', '0', 'REDUCE', 'COUNT_DISTINCT', '1', '@fld1'], 62 | g.build_args()) 63 | 64 | def testAggRequest(self): 65 | req = a.AggregateRequest() 66 | self.assertEqual(['*'], req.build_args()) 67 | 68 | # Test with group_by 69 | req = a.AggregateRequest().group_by('@foo', r.count()) 70 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0'], req.build_args()) 71 | 72 | # Test with group_by and alias on reducer 73 | req = a.AggregateRequest().group_by('@foo', r.count().alias('foo_count')) 74 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'AS', 'foo_count'], req.build_args()) 75 | 76 | # Test with limit 77 | req = a.AggregateRequest(). \ 78 | group_by('@foo', r.count()). \ 79 | sort_by('@foo') 80 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'SORTBY', '1', 81 | '@foo'], req.build_args()) 82 | 83 | # Test with apply 84 | req = a.AggregateRequest(). \ 85 | apply(foo="@bar / 2"). \ 86 | group_by('@foo', r.count()) 87 | 88 | self.assertEqual(['*', 'APPLY', '@bar / 2', 'AS', 'foo', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0'], 89 | req.build_args()) 90 | 91 | # Test with filter 92 | req = a.AggregateRequest().group_by('@foo', r.count()).filter( "@foo=='bar'") 93 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'FILTER', "@foo=='bar'" ], req.build_args()) 94 | 95 | # Test with filter on different state of the pipeline 96 | req = a.AggregateRequest().filter("@foo=='bar'").group_by('@foo', r.count()) 97 | self.assertEqual(['*', 'FILTER', "@foo=='bar'", 'GROUPBY', '1', '@foo','REDUCE', 'COUNT', '0' ], req.build_args()) 98 | 99 | # Test with filter on different state of the pipeline 100 | req = a.AggregateRequest().filter(["@foo=='bar'","@foo2=='bar2'"]).group_by('@foo', r.count()) 101 | self.assertEqual(['*', 'FILTER', "@foo=='bar'", 'FILTER', "@foo2=='bar2'", 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0'], 102 | req.build_args()) 103 | 104 | # Test with sort_by 105 | req = a.AggregateRequest().group_by('@foo', r.count()).sort_by('@date') 106 | # print req.build_args() 107 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'SORTBY', '1', '@date'], 108 | req.build_args()) 109 | 110 | req = a.AggregateRequest().group_by('@foo', r.count()).sort_by(a.Desc('@date')) 111 | # print req.build_args() 112 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'SORTBY', '2', '@date', 'DESC'], 113 | req.build_args()) 114 | 115 | req = a.AggregateRequest().group_by('@foo', r.count()).sort_by(a.Desc('@date'), a.Asc('@time')) 116 | # print req.build_args() 117 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'SORTBY', '4', '@date', 'DESC', '@time', 'ASC'], 118 | req.build_args()) 119 | 120 | req = a.AggregateRequest().group_by('@foo', r.count()).sort_by(a.Desc('@date'), a.Asc('@time'), max=10) 121 | self.assertEqual(['*', 'GROUPBY', '1', '@foo', 'REDUCE', 'COUNT', '0', 'SORTBY', '4', '@date', 'DESC', '@time', 'ASC', 'MAX', '10'], 122 | req.build_args()) 123 | 124 | def test_reducers(self): 125 | self.assertEqual((), r.count().args) 126 | self.assertEqual(('f1',), r.sum('f1').args) 127 | self.assertEqual(('f1',), r.min('f1').args) 128 | self.assertEqual(('f1',), r.max('f1').args) 129 | self.assertEqual(('f1',), r.avg('f1').args) 130 | self.assertEqual(('f1',), r.tolist('f1').args) 131 | self.assertEqual(('f1',), r.count_distinct('f1').args) 132 | self.assertEqual(('f1',), r.count_distinctish('f1').args) 133 | self.assertEqual(('f1', '0.95'), r.quantile('f1', 0.95).args) 134 | self.assertEqual(('f1',), r.stddev('f1').args) 135 | 136 | self.assertEqual(('f1',), r.first_value('f1').args) 137 | self.assertEqual(('f1', 'BY', 'f2', 'ASC'), r.first_value('f1', a.Asc('f2')).args) 138 | self.assertEqual(('f1', 'BY', 'f1', 'ASC'), r.first_value('f1', a.Asc).args) 139 | 140 | self.assertEqual(('f1', '50'), r.random_sample('f1', 50).args) 141 | 142 | 143 | if __name__ == '__main__': 144 | unittest.main() 145 | -------------------------------------------------------------------------------- /test/will_play_text.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RediSearch/redisearch-py/6c2a1eca876ac5f9fe8bb2cb8c7756d77f407576/test/will_play_text.csv.bz2 -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = linters,cover,test_with_coverage,py27,py36,py37,py38,py39,py310 4 | 5 | [flake8] 6 | max-complexity = 10 7 | ignore = E127,E265,E266,E301,E501 8 | srcdir = redisearch 9 | show-source = true 10 | exclude =.git,.tox,dist,doc,*/__pycache__/*,*test*.py 11 | 12 | # virtualenv bug #2214 13 | [testenv:cover] 14 | whitelist_externals = find 15 | commands_pre = 16 | pip install --upgrade pip 17 | setenv = 18 | REDIS_PORT = 6379 19 | commands = 20 | coverage run test/test.py 21 | coverage run -a test/test_builder.py 22 | codecov 23 | 24 | [testenv:test_without_coverage] 25 | whitelist_externals = find 26 | setenv = 27 | REDIS_PORT = 6379 28 | commands = 29 | python test/test.py 30 | python test/test_builder.py 31 | 32 | [testenv:linters] 33 | commands = 34 | # flake8 --show-source 35 | vulture redisearch --min-confidence 80 36 | bandit redisearch/** 37 | --------------------------------------------------------------------------------