├── .cursor
    └── rules
    │   └── redisvl.mdc
├── .github
    ├── release-drafter-config.yml
    └── workflows
    │   ├── claude.yml
    │   ├── lint.yml
    │   ├── release-drafter.yml
    │   ├── release.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .readthedocs.yaml
├── CLAUDE.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── docs
    ├── Makefile
    ├── _extension
    │   └── gallery_directive.py
    ├── _static
    │   ├── .nojekyll
    │   ├── Redis_Favicon_144x144_Red.png
    │   ├── Redis_Favicon_16x16_Red.png
    │   ├── Redis_Favicon_32x32_Red.png
    │   ├── Redis_Logo_Red_RGB.svg
    │   ├── css
    │   │   ├── custom.css
    │   │   └── sidebar.css
    │   ├── gallery.yaml
    │   └── site.webmanifest
    ├── _templates
    │   └── layout.html
    ├── api
    │   ├── cache.rst
    │   ├── filter.rst
    │   ├── index.md
    │   ├── message_history.rst
    │   ├── query.rst
    │   ├── reranker.rst
    │   ├── router.rst
    │   ├── schema.rst
    │   ├── searchindex.rst
    │   ├── threshold_optimizer.rst
    │   └── vectorizer.rst
    ├── conf.py
    ├── examples
    │   └── index.md
    ├── index.md
    ├── make.bat
    ├── overview
    │   ├── cli.ipynb
    │   ├── index.md
    │   ├── installation.md
    │   └── schema.yaml
    └── user_guide
    │   ├── 01_getting_started.ipynb
    │   ├── 02_hybrid_queries.ipynb
    │   ├── 03_llmcache.ipynb
    │   ├── 04_vectorizers.ipynb
    │   ├── 05_hash_vs_json.ipynb
    │   ├── 06_rerankers.ipynb
    │   ├── 07_message_history.ipynb
    │   ├── 08_semantic_router.ipynb
    │   ├── 09_threshold_optimization.ipynb
    │   ├── 10_embeddings_cache.ipynb
    │   ├── hybrid_example_data.pkl
    │   ├── index.md
    │   ├── jupyterutils.py
    │   ├── release_guide
    │       ├── 0_5_1_release.ipynb
    │       └── index.md
    │   ├── router.yaml
    │   └── schema.yaml
├── doctests
    ├── data
    │   ├── query_vector.json
    │   └── query_vector_idx.yaml
    └── query_vector.py
├── poetry.lock
├── pyproject.toml
├── redisvl
    ├── __init__.py
    ├── cli
    │   ├── __init__.py
    │   ├── index.py
    │   ├── main.py
    │   ├── runner.py
    │   ├── stats.py
    │   ├── utils.py
    │   └── version.py
    ├── exceptions.py
    ├── extensions
    │   ├── __init__.py
    │   ├── cache
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── embeddings
    │   │   │   ├── __init__.py
    │   │   │   ├── embeddings.py
    │   │   │   └── schema.py
    │   │   └── llm
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── schema.py
    │   │   │   └── semantic.py
    │   ├── constants.py
    │   ├── llmcache
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── schema.py
    │   │   └── semantic.py
    │   ├── message_history
    │   │   ├── __init__.py
    │   │   ├── base_history.py
    │   │   ├── message_history.py
    │   │   ├── schema.py
    │   │   └── semantic_history.py
    │   ├── router
    │   │   ├── __init__.py
    │   │   ├── schema.py
    │   │   └── semantic.py
    │   └── session_manager
    │   │   ├── __init__.py
    │   │   ├── base_session.py
    │   │   ├── schema.py
    │   │   ├── semantic_session.py
    │   │   └── standard_session.py
    ├── index
    │   ├── __init__.py
    │   ├── index.py
    │   └── storage.py
    ├── query
    │   ├── __init__.py
    │   ├── aggregate.py
    │   ├── filter.py
    │   └── query.py
    ├── redis
    │   ├── __init__.py
    │   ├── connection.py
    │   ├── constants.py
    │   └── utils.py
    ├── schema
    │   ├── __init__.py
    │   ├── fields.py
    │   ├── schema.py
    │   ├── type_utils.py
    │   └── validation.py
    ├── types.py
    ├── utils
    │   ├── __init__.py
    │   ├── log.py
    │   ├── optimize
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cache.py
    │   │   ├── router.py
    │   │   ├── schema.py
    │   │   └── utils.py
    │   ├── rerank
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cohere.py
    │   │   ├── hf_cross_encoder.py
    │   │   └── voyageai.py
    │   ├── token_escaper.py
    │   ├── utils.py
    │   └── vectorize
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   └── text
    │   │       ├── __init__.py
    │   │       ├── azureopenai.py
    │   │       ├── bedrock.py
    │   │       ├── cohere.py
    │   │       ├── custom.py
    │   │       ├── huggingface.py
    │   │       ├── mistral.py
    │   │       ├── openai.py
    │   │       ├── vertexai.py
    │   │       └── voyageai.py
    └── version.py
├── schemas
    ├── schema.yaml
    ├── semantic_router.yaml
    ├── test_hash_schema.yaml
    └── test_json_schema.yaml
├── scripts.py
└── tests
    ├── cluster-compose.yml
    ├── conftest.py
    ├── docker-compose.yml
    ├── integration
        ├── test_aggregation.py
        ├── test_async_search_index.py
        ├── test_connection.py
        ├── test_cross_encoder_reranker.py
        ├── test_embedcache.py
        ├── test_flow.py
        ├── test_flow_async.py
        ├── test_llmcache.py
        ├── test_message_history.py
        ├── test_query.py
        ├── test_redis_cluster_support.py
        ├── test_rerankers.py
        ├── test_search_index.py
        ├── test_search_results.py
        ├── test_semantic_router.py
        ├── test_threshold_optimizer.py
        └── test_vectorizers.py
    └── unit
        ├── logger_interference_checker.py
        ├── test_aggregation_types.py
        ├── test_base_vectorizer.py
        ├── test_embedcache_schema.py
        ├── test_error_handling.py
        ├── test_fields.py
        ├── test_filter.py
        ├── test_llmcache_schema.py
        ├── test_message_history_schema.py
        ├── test_query_types.py
        ├── test_route_schema.py
        ├── test_schema.py
        ├── test_storage.py
        ├── test_threshold_optimizer_utility.py
        ├── test_token_escaper.py
        ├── test_utils.py
        └── test_validation.py


/.cursor/rules/redisvl.mdc:
--------------------------------------------------------------------------------
 1 | ---
 2 | description: 
 3 | globs: 
 4 | alwaysApply: true
 5 | ---
 6 | 
 7 | # Rules for working on RedisVL
 8 | - Do not change this line of code unless explicitly asked. It's already correct:
 9 | ```
10 | token.strip().strip(",").replace("“", "").replace("”", "").lower()
11 | ```


--------------------------------------------------------------------------------
/.github/release-drafter-config.yml:
--------------------------------------------------------------------------------
 1 | name-template: '$NEXT_MINOR_VERSION'
 2 | tag-template: 'v$NEXT_MINOR_VERSION'
 3 | autolabeler:
 4 |   - label: 'maintenance'
 5 |     files:
 6 |       - '*.md'
 7 |       - '.github/*'
 8 |   - label: 'bug'
 9 |     branch:
10 |       - '/bug-.+'
11 |   - label: 'maintenance'
12 |     branch:
13 |       - '/maintenance-.+'
14 |   - label: 'feature'
15 |     branch:
16 |       - '/feature-.+'
17 | categories:
18 |   - title: 'Breaking Changes'
19 |     labels:
20 |       - 'breakingchange'
21 |   - title: '🧪 Experimental Features'
22 |     labels:
23 |       - 'experimental'
24 |   - title: '🚀 New Features'
25 |     labels:
26 |       - 'feature'
27 |       - 'enhancement'
28 |   - title: '🐛 Bug Fixes'
29 |     labels:
30 |       - 'fix'
31 |       - 'bugfix'
32 |       - 'bug'
33 |       - 'BUG'
34 |   - title: '🧰 Maintenance'
35 |     label: 'maintenance'
36 | change-template: '- $TITLE (#$NUMBER)'
37 | exclude-labels:
38 |   - 'skip-changelog'
39 | template: |
40 |   # Changes
41 | 
42 |   $CHANGES
43 | 
44 |   ## Contributors
45 |   We'd like to thank all the contributors who worked on this release!
46 | 
47 |   $CONTRIBUTORS
48 | 
49 | 


--------------------------------------------------------------------------------
/.github/workflows/claude.yml:
--------------------------------------------------------------------------------
 1 | name: Claude Code
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   pull_request_review_comment:
 7 |     types: [created]
 8 |   issues:
 9 |     types: [opened, assigned]
10 |   pull_request_review:
11 |     types: [submitted]
12 | 
13 | jobs:
14 |   claude:
15 |     if: |
16 |       (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17 |       (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18 |       (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19 |       (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20 |     runs-on: ubuntu-latest
21 |     permissions:
22 |       contents: read
23 |       pull-requests: read
24 |       issues: read
25 |       id-token: write
26 |     steps:
27 |       - name: Checkout repository
28 |         uses: actions/checkout@v4
29 |         with:
30 |           fetch-depth: 1
31 | 
32 |       - name: Run Claude Code
33 |         id: claude
34 |         uses: anthropics/claude-code-action@beta
35 |         with:
36 |           anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
37 | 
38 |           # Define which tools Claude can use
39 |           allowed_tools: "Bash(git status),Bash(git log),Bash(git show),Bash(git blame),Bash(git reflog),Bash(git stash list),Bash(git ls-files),Bash(git branch),Bash(git tag),Bash(git diff),Bash(make:*),Bash(pytest:*),Bash(cd:*),Bash(ls:*),Bash(make),Bash(make:*),View,GlobTool,GrepTool,BatchTool"
40 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: Lint
 3 | 
 4 | on:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 | 
10 | env:
11 |   POETRY_VERSION: "1.8.3"
12 | 
13 | jobs:
14 |   check:
15 |     name: Style-check ${{ matrix.python-version }}
16 |     runs-on: ubuntu-latest
17 |     strategy:
18 |       matrix:
19 |         # Only lint on the min and max supported Python versions.
20 |         # It's extremely unlikely that there's a lint issue on any version in between
21 |         # that doesn't show up on the min or max versions.
22 |         #
23 |         # GitHub rate-limits how many jobs can be running at any one time.
24 |         # Starting new jobs is also relatively slow,
25 |         # so linting on fewer versions makes CI faster.
26 |         python-version:
27 |           - "3.9"
28 |           - "3.10"
29 |           - "3.11"
30 |           - "3.12"
31 |           - "3.13"
32 | 
33 |     steps:
34 |     - uses: actions/checkout@v2
35 |     - name: Set up Python ${{ matrix.python-version }}
36 |       uses: actions/setup-python@v2
37 |       with:
38 |         python-version: ${{ matrix.python-version }}
39 |     - name: Install Poetry
40 |       uses: snok/install-poetry@v1
41 |       with:
42 |         version: ${{ env.POETRY_VERSION }}
43 |     - name: Install dependencies
44 |       run: |
45 |         poetry install --all-extras
46 |     - name: check-sort-import
47 |       run: |
48 |         poetry run check-sort-imports
49 |     - name: check-black-format
50 |       run: |
51 |         poetry run check-format
52 |     - name: check-mypy
53 |       run: |
54 |         poetry run check-mypy


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - main
 8 | 
 9 | permissions: {}
10 | jobs:
11 |   update_release_draft:
12 |     permissions:
13 |       pull-requests: write  #  to add label to PR (release-drafter/release-drafter)
14 |       contents: write  #  to create a github release (release-drafter/release-drafter)
15 | 
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       # Drafts your next Release notes as Pull Requests are merged into "master"
19 |       - uses: release-drafter/release-drafter@v5
20 |         with:
21 |           # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
22 |            config-name: release-drafter-config.yml
23 |         env:
24 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | env:
 8 |   PYTHON_VERSION: "3.11"
 9 |   POETRY_VERSION: "1.8.3"
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v4
20 |         with:
21 |           python-version: ${{ env.PYTHON_VERSION }}
22 | 
23 |       - name: Install Poetry
24 |         uses: snok/install-poetry@v1
25 |         with:
26 |           version: ${{ env.POETRY_VERSION }}
27 | 
28 |       - name: Build package
29 |         run: poetry build
30 | 
31 |       - name: Upload build
32 |         uses: actions/upload-artifact@v4
33 |         with:
34 |           name: dist
35 |           path: dist/
36 | 
37 |   publish:
38 |     needs: build
39 |     runs-on: ubuntu-latest
40 | 
41 |     steps:
42 |       - uses: actions/checkout@v4
43 | 
44 |       - name: Set up Python
45 |         uses: actions/setup-python@v4
46 |         with:
47 |           python-version: ${{ env.PYTHON_VERSION }}
48 | 
49 |       - name: Install Poetry
50 |         uses: snok/install-poetry@v1
51 |         with:
52 |           version: ${{ env.POETRY_VERSION }}
53 | 
54 |       - uses: actions/download-artifact@v4
55 |         with:
56 |           name: dist
57 |           path: dist/
58 | 
59 |       - name: Publish to PyPI
60 |         env:
61 |           POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI }}
62 |         run: poetry publish


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python,venv,macos
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,venv,macos
  3 | 
  4 | ### macOS ###
  5 | # General
  6 | .DS_Store
  7 | .AppleDouble
  8 | .LSOverride
  9 | 
 10 | # Icon must end with two \r
 11 | Icon
 12 | 
 13 | 
 14 | # Thumbnails
 15 | ._*
 16 | 
 17 | # Files that might appear in the root of a volume
 18 | .DocumentRevisions-V100
 19 | .fseventsd
 20 | .Spotlight-V100
 21 | .TemporaryItems
 22 | .Trashes
 23 | .VolumeIcon.icns
 24 | .com.apple.timemachine.donotpresent
 25 | 
 26 | # Directories potentially created on remote AFP share
 27 | .AppleDB
 28 | .AppleDesktop
 29 | Network Trash Folder
 30 | Temporary Items
 31 | .apdisk
 32 | 
 33 | ### macOS Patch ###
 34 | # iCloud generated files
 35 | *.icloud
 36 | 
 37 | ### Python ###
 38 | # Byte-compiled / optimized / DLL files
 39 | __pycache__/
 40 | *.py[cod]
 41 | *$py.class
 42 | 
 43 | # C extensions
 44 | *.so
 45 | 
 46 | # Distribution / packaging
 47 | .Python
 48 | build/
 49 | develop-eggs/
 50 | dist/
 51 | downloads/
 52 | eggs/
 53 | .eggs/
 54 | lib/
 55 | lib64/
 56 | parts/
 57 | sdist/
 58 | var/
 59 | wheels/
 60 | share/python-wheels/
 61 | *.egg-info/
 62 | .installed.cfg
 63 | *.egg
 64 | MANIFEST
 65 | 
 66 | # PyInstaller
 67 | #  Usually these files are written by a python script from a template
 68 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 69 | *.manifest
 70 | *.spec
 71 | 
 72 | # Installer logs
 73 | pip-log.txt
 74 | pip-delete-this-directory.txt
 75 | 
 76 | # Unit test / coverage reports
 77 | htmlcov/
 78 | .tox/
 79 | .nox/
 80 | .coverage
 81 | .coverage.*
 82 | .cache
 83 | nosetests.xml
 84 | coverage.xml
 85 | *.cover
 86 | *.py,cover
 87 | .hypothesis/
 88 | .pytest_cache/
 89 | cover/
 90 | 
 91 | # Translations
 92 | *.mo
 93 | *.pot
 94 | 
 95 | # Django stuff:
 96 | *.log
 97 | local_settings.py
 98 | db.sqlite3
 99 | db.sqlite3-journal
100 | 
101 | # Flask stuff:
102 | instance/
103 | .webassets-cache
104 | 
105 | # Scrapy stuff:
106 | .scrapy
107 | 
108 | # Sphinx documentation
109 | docs/_build/
110 | 
111 | # PyBuilder
112 | .pybuilder/
113 | target/
114 | 
115 | # Jupyter Notebook
116 | .ipynb_checkpoints
117 | 
118 | # IPython
119 | profile_default/
120 | ipython_config.py
121 | 
122 | # pyenv
123 | #   For a library or package, you might want to ignore these files since the code is
124 | #   intended to run in multiple environments; otherwise, check them in:
125 | .python-version
126 | 
127 | # pipenv
128 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
129 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
130 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
131 | #   install all needed dependencies.
132 | #Pipfile.lock
133 | 
134 | # poetry
135 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
136 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
137 | #   commonly ignored for libraries.
138 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
139 | #poetry.lock
140 | 
141 | # pdm
142 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
143 | #pdm.lock
144 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
145 | #   in version control.
146 | #   https://pdm.fming.dev/#use-with-ide
147 | .pdm.toml
148 | 
149 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
150 | __pypackages__/
151 | 
152 | # Celery stuff
153 | celerybeat-schedule
154 | celerybeat.pid
155 | 
156 | # SageMath parsed files
157 | *.sage.py
158 | 
159 | # Environments
160 | .env
161 | .venv
162 | env/
163 | venv/
164 | ENV/
165 | env.bak/
166 | venv.bak/
167 | 
168 | # Spyder project settings
169 | .spyderproject
170 | .spyproject
171 | 
172 | # Rope project settings
173 | .ropeproject
174 | 
175 | # mkdocs documentation
176 | /site
177 | 
178 | # mypy
179 | .mypy_cache/
180 | .dmypy.json
181 | dmypy.json
182 | 
183 | # Pyre type checker
184 | .pyre/
185 | 
186 | # pytype static type analyzer
187 | .pytype/
188 | 
189 | # Cython debug symbols
190 | cython_debug/
191 | 
192 | # PyCharm
193 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
194 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
195 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
196 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
197 | #.idea/
198 | 
199 | ### Python Patch ###
200 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
201 | poetry.toml
202 | 
203 | # ruff
204 | .ruff_cache/
205 | 
206 | # LSP config files
207 | pyrightconfig.json
208 | 
209 | ### venv ###
210 | # Virtualenv
211 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
212 | [Bb]in
213 | [Ii]nclude
214 | [Ll]ib
215 | [Ll]ib64
216 | [Ll]ocal
217 | pyvenv.cfg
218 | pip-selfcheck.json
219 | env
220 | venv
221 | .venv
222 | 
223 | libs/redis/docs/.Trash*
224 | .python-version
225 | .idea/*
226 | .vscode/settings.json
227 | .python-version
228 | tests/data
229 | .git
230 | .cursor
231 | .junie
232 | .undodir
233 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |   - repo: local
3 |     hooks:
4 |       - id: poetry-checks
5 |         name: Run pre-commit checks (format, sort-imports, check-mypy)
6 |         entry: bash -c 'poetry run format && poetry run sort-imports && poetry run check-mypy'
7 |         language: system
8 |         pass_filenames: false


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | # Required
 5 | version: 2
 6 | 
 7 | # Set the OS, Python version, and other tools you might need
 8 | build:
 9 |   os: ubuntu-24.04
10 |   tools:
11 |     python: "3.11"
12 |   jobs:
13 |     post_create_environment:
14 |       # Install poetry
15 |       - python -m pip install poetry==1.8.3
16 |     post_install:
17 |       # Install dependencies
18 |       - VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --with docs
19 | 
20 | # Build documentation in the "docs/" directory with Sphinx
21 | sphinx:
22 |    configuration: docs/conf.py
23 | 
24 | formats:
25 |   - pdf
26 |   - epub
27 | 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # CLAUDE.md - RedisVL Project Context
  2 | 
  3 | ## Frequently Used Commands
  4 | 
  5 | ```bash
  6 | # Development workflow
  7 | make install          # Install dependencies
  8 | make format           # Format code (black + isort)
  9 | make check-types      # Run mypy type checking
 10 | make lint             # Run all linting (format + types)
 11 | make test             # Run tests (no external APIs)
 12 | make test-all         # Run all tests (includes API tests)
 13 | make check            # Full check (lint + test)
 14 | 
 15 | # Redis setup
 16 | make redis-start      # Start Redis Stack container
 17 | make redis-stop       # Stop Redis Stack container
 18 | 
 19 | # Documentation
 20 | make docs-build       # Build documentation
 21 | make docs-serve       # Serve docs locally
 22 | ```
 23 | 
 24 | Pre-commit hooks are also configured, which you should
 25 | run before you commit:
 26 | ```bash
 27 | pre-commit run --all-files
 28 | ```
 29 | 
 30 | ## Important Architectural Patterns
 31 | 
 32 | ### Async/Sync Dual Interfaces
 33 | - Most core classes have both sync and async versions (e.g., `SearchIndex` / `AsyncSearchIndex`)
 34 | - Follow existing patterns when adding new functionality
 35 | 
 36 | ### Schema-Driven Design
 37 | ```python
 38 | # Index schemas define structure
 39 | schema = IndexSchema.from_yaml("schema.yaml")
 40 | index = SearchIndex(schema, redis_url="redis://localhost:6379")
 41 | ```
 42 | 
 43 | ## Critical Rules
 44 | 
 45 | ### Do Not Modify
 46 | - **CRITICAL**: Do not change this line unless explicitly asked:
 47 |   ```python
 48 |   token.strip().strip(",").replace(""", "").replace(""", "").lower()
 49 |   ```
 50 | 
 51 | ### README.md Maintenance
 52 | **IMPORTANT**: DO NOT modify README.md unless explicitly requested.
 53 | 
 54 | **If you need to document something, use these alternatives:**
 55 | - Development info → CONTRIBUTING.md
 56 | - API details → docs/ directory
 57 | - Examples → docs/examples/
 58 | - Project memory (explicit preferences, directives, etc.) → CLAUDE.md
 59 | 
 60 | ## Testing Notes
 61 | RedisVL uses `pytest` with `testcontainers` for testing.
 62 | 
 63 | - `make test` - unit tests only (no external APIs)
 64 | - `make test-all` - includes integration tests requiring API keys
 65 | 
 66 | ## Project Structure
 67 | 
 68 | ```
 69 | redisvl/
 70 | ├── cli/              # Command-line interface (rvl command)
 71 | ├── extensions/       # AI extensions (cache, memory, routing)
 72 | │   ├── cache/        # Semantic caching for LLMs
 73 | │   ├── llmcache/     # LLM-specific caching
 74 | │   ├── message_history/  # Chat history management
 75 | │   ├── router/       # Semantic routing
 76 | │   └── session_manager/  # Session management
 77 | ├── index/            # SearchIndex classes (sync/async)
 78 | ├── query/            # Query builders (Vector, Range, Filter, Count)
 79 | ├── redis/            # Redis client utilities
 80 | ├── schema/           # Index schema definitions
 81 | └── utils/            # Utilities (vectorizers, rerankers, optimization)
 82 |     ├── optimize/     # Threshold optimization
 83 |     ├── rerank/       # Result reranking
 84 |     └── vectorize/    # Embedding providers integration
 85 | ```
 86 | 
 87 | ## Core Components
 88 | 
 89 | ### 1. Index Management
 90 | - `SearchIndex` / `AsyncSearchIndex` - Main interface for Redis vector indices
 91 | - `IndexSchema` - Define index structure with fields (text, tags, vectors, etc.)
 92 | - Support for JSON and Hash storage types
 93 | 
 94 | ### 2. Query System
 95 | - `VectorQuery` - Semantic similarity search
 96 | - `RangeQuery` - Vector search within distance range
 97 | - `FilterQuery` - Metadata filtering and full-text search
 98 | - `CountQuery` - Count matching records
 99 | - Etc.
100 | 
101 | ### 3. AI Extensions
102 | - `SemanticCache` - LLM response caching with semantic similarity
103 | - `EmbeddingsCache` - Cache for vector embeddings
104 | - `MessageHistory` - Chat history with recency/relevancy retrieval
105 | - `SemanticRouter` - Route queries to topics/intents
106 | 
107 | ### 4. Vectorizers (Optional Dependencies)
108 | - OpenAI, Azure OpenAI, Cohere, HuggingFace, Mistral, VoyageAI
109 | - Custom vectorizer support
110 | - Batch processing capabilities
111 | 
112 | ## Documentation
113 | - Main docs: https://docs.redisvl.com
114 | - Built with Sphinx from `docs/` directory
115 | - Includes API reference and user guides
116 | - Example notebooks in documentation `docs/user_guide/...`
117 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | ## Introduction
  4 | 
  5 | First off, thank you for considering contributions. We value community contributions!
  6 | 
  7 | ## Contributions We Need
  8 | 
  9 | You may already know what you want to contribute \-- a fix for a bug you
 10 | encountered, or a new feature your team wants to use.
 11 | 
 12 | If you don't know what to contribute, keep an open mind! Improving
 13 | documentation, bug triaging, and writing tutorials are all examples of
 14 | helpful contributions that mean less work for you.
 15 | 
 16 | ## Your First Contribution
 17 | 
 18 | Unsure where to begin contributing? You can start by looking through some of our issues [listed here](https://github.com/redis/redis-vl-python/issues).
 19 | 
 20 | ## Getting Started
 21 | 
 22 | Here's how to get started with your code contribution:
 23 | 
 24 | 1.  Create your own fork of this repo
 25 | 2.  Set up your developer environment
 26 | 2.  Apply the changes in your forked codebase / environment
 27 | 4.  If you like the change and think the project could use it, send us a
 28 |     pull request.
 29 | 
 30 | ### Dev Environment
 31 | RedisVL uses [Poetry](https://python-poetry.org/) for dependency management.
 32 | 
 33 | Follow the instructions to [install Poetry](https://python-poetry.org/docs/#installation).
 34 | 
 35 | Then install the required libraries:
 36 | 
 37 | ```bash
 38 | poetry install --all-extras
 39 | ```
 40 | 
 41 | ### Optional Makefile
 42 | 
 43 | If you use `make`, we've created shortcuts for running the commands in this document.
 44 | 
 45 | | Command | Description |
 46 | |---------|-------------|
 47 | | make install | Installs all dependencies using Poetry|
 48 | | make redis-start | Starts Redis Stack in a Docker container on ports 6379 and 8001 |
 49 | | make redis-stop | Stops the Redis Stack Docker container |
 50 | | make format | Runs code formatting and import sorting |
 51 | | make check-types | Runs mypy type checking |
 52 | | make lint | Runs formatting, import sorting, and type checking |
 53 | | make test | Runs tests, excluding those that require API keys and/or remote network calls|
 54 | | make test-all | Runs all tests, including those that require API keys and/or remote network calls|
 55 | | make test-notebooks | Runs all notebook tests|
 56 | | make check | Runs all linting targets and a subset of tests |
 57 | | make docs-build | Builds the documentation |
 58 | | make docs-serve | Serves the documentation locally |
 59 | | make clean | Removes all generated files (cache, coverage, build artifacts, etc.) |
 60 | 
 61 | ### Linting and Tests
 62 | 
 63 | Check formatting, linting, and typing:
 64 | ```bash
 65 | poetry run format
 66 | poetry run sort-imports
 67 | poetry run check-mypy
 68 | ```
 69 | 
 70 | #### TestContainers
 71 | 
 72 | RedisVL uses Testcontainers Python for integration tests. Testcontainers is an open-source framework for provisioning throwaway, on-demand containers for development and testing use cases.
 73 | 
 74 | To run Testcontainers-based tests you need a local Docker installation such as:
 75 | - [Docker Desktop](https://www.docker.com/products/docker-desktop/)
 76 | - [Docker Engine on Linux](https://docs.docker.com/engine/install/)
 77 | 
 78 | #### Running the Tests
 79 | 
 80 | Tests w/ external APIs:
 81 | ```bash
 82 | poetry run test-verbose --run-api-tests
 83 | ```
 84 | 
 85 | Tests w/out external APIs:
 86 | ```bash
 87 | poetry run test-verbose
 88 | ```
 89 | 
 90 | Run a test on a specific file:
 91 | ```bash
 92 | poetry run test-verbose tests/unit/test_fields.py
 93 | ```
 94 | 
 95 | ### Documentation
 96 | Docs are served from the `docs/` directory.
 97 | 
 98 | Build the docs. Generates the `_build/html` contents:
 99 | ```bash
100 | poetry run build-docs
101 | ```
102 | 
103 | Serve the documentation with a local webserver:
104 | ```bash
105 | poetry run serve-docs
106 | ```
107 | 
108 | ### Getting Redis
109 | 
110 | In order for your applications to use RedisVL, you must have [Redis](https://redis.io) accessible with Search & Query features enabled on [Redis Cloud](https://redis.io/cloud/) or locally in docker with [Redis Stack](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/):
111 | 
112 | ```bash
113 | docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
114 | ```
115 | 
116 | Or from your makefile simply run:
117 | 
118 | ```bash
119 | make redis-start
120 | ```
121 | 
122 | And then:
123 | ```bash
124 | make redis-stop
125 | ```
126 | 
127 | This will also spin up the [FREE RedisInsight GUI](https://redis.io/insight/) at `http://localhost:8001`.
128 | 
129 | ## How to Report a Bug
130 | 
131 | ### Security Vulnerabilities
132 | 
133 | **NOTE**: If you find a security vulnerability, do NOT open an issue.
134 | Email [Redis OSS (<oss@redis.com>)](mailto:oss@redis.com) instead.
135 | 
136 | In order to determine whether you are dealing with a security issue, ask
137 | yourself these two questions:
138 | 
139 | -   Can I access something that's not mine, or something I shouldn't
140 |     have access to?
141 | -   Can I disable something for other people?
142 | 
143 | If the answer to either of those two questions are *yes*, then you're
144 | probably dealing with a security issue. Note that even if you answer
145 | *no*  to both questions, you may still be dealing with a security
146 | issue, so if you're unsure, just email us.
147 | 
148 | ### Everything Else
149 | 
150 | When filing an issue, make sure to answer these five questions:
151 | 
152 | 1.  What version of python are you using?
153 | 2.  What version of `redis` and `redisvl` are you using?
154 | 3.  What did you do?
155 | 4.  What did you expect to see?
156 | 5.  What did you see instead?
157 | 
158 | ## How to Suggest a Feature or Enhancement
159 | 
160 | If you'd like to contribute a new feature, make sure you check our
161 | issue list to see if someone has already proposed it. Work may already
162 | be under way on the feature you want -- or we may have rejected a
163 | feature like it already.
164 | 
165 | If you don't see anything, open a new issue that describes the feature
166 | you would like and how it should work.
167 | 
168 | ## Code Review Process
169 | 
170 | The core team looks at Pull Requests on a regular basis. We will give
171 | feedback as as soon as possible. After feedback, we expect a response
172 | within two weeks. After that time, we may close your PR if it isn't
173 | showing any activity.
174 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Redis, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: install format lint test test-all test-notebooks clean redis-start redis-stop check-types docs-build docs-serve check
 2 | 
 3 | install:
 4 | 	poetry install --all-extras
 5 | 
 6 | redis-start:
 7 | 	docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
 8 | 
 9 | redis-stop:
10 | 	docker stop redis-stack
11 | 
12 | format:
13 | 	poetry run format
14 | 	poetry run sort-imports
15 | 
16 | check-types:
17 | 	poetry run check-mypy
18 | 
19 | lint: format check-types
20 | 	
21 | test:
22 | 	poetry run test-verbose 
23 | 	
24 | test-all:
25 | 	poetry run test-verbose --run-api-tests
26 | 
27 | test-notebooks:
28 | 	poetry run test-notebooks
29 | 
30 | check: lint test
31 | 
32 | docs-build:
33 | 	poetry run build-docs
34 | 
35 | docs-serve:
36 | 	poetry run serve-docs
37 | 
38 | clean:
39 | 	find . -type d -name "__pycache__" -exec rm -rf {} +
40 | 	find . -type d -name ".pytest_cache" -exec rm -rf {} +
41 | 	find . -type d -name ".mypy_cache" -exec rm -rf {} +
42 | 	find . -type d -name ".coverage" -delete
43 | 	find . -type d -name "htmlcov" -exec rm -rf {} +
44 | 	find . -type d -name "dist" -exec rm -rf {} +
45 | 	find . -type d -name "build" -exec rm -rf {} +
46 | 	find . -type d -name "*.egg-info" -exec rm -rf {} +
47 | 	find . -type d -name "_build" -exec rm -rf {} +
48 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 
21 | 	# build docs
22 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
23 | 


--------------------------------------------------------------------------------
/docs/_extension/gallery_directive.py:
--------------------------------------------------------------------------------
  1 | """A directive to generate a gallery of images from structured data.
  2 | 
  3 | Generating a gallery of images that are all the same size is a common pattern in
  4 | documentation, and this can be cumbersome if the gallery is generated
  5 | programmatically. This directive wraps this particular use-case in a helper-
  6 | directive to generate it with a single YAML configuration file.
  7 | 
  8 | It currently exists for maintainers of the pydata-sphinx-theme, but might be
  9 | abstracted into a standalone package if it proves useful.
 10 | """
 11 | from pathlib import Path
 12 | from typing import Any, Dict, List
 13 | 
 14 | from docutils import nodes
 15 | from docutils.parsers.rst import directives
 16 | from sphinx.application import Sphinx
 17 | from sphinx.util import logging
 18 | from sphinx.util.docutils import SphinxDirective
 19 | from yaml import safe_load
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | TEMPLATE_GRID = """
 25 | `````{{grid}} {grid_columns}
 26 | {container_options}
 27 | 
 28 | {content}
 29 | 
 30 | `````
 31 | """
 32 | 
 33 | GRID_CARD = """
 34 | ````{{grid-item-card}} {title}
 35 | {card_options}
 36 | 
 37 | {content}
 38 | ````
 39 | """
 40 | 
 41 | 
 42 | class GalleryDirective(SphinxDirective):
 43 |     """A directive to show a gallery of images and links in a grid."""
 44 | 
 45 |     name = "gallery-grid"
 46 |     has_content = True
 47 |     required_arguments = 0
 48 |     optional_arguments = 1
 49 |     final_argument_whitespace = True
 50 |     option_spec = {
 51 |         # A class to be added to the resulting container
 52 |         "grid-columns": directives.unchanged,
 53 |         "class-container": directives.unchanged,
 54 |         "class-card": directives.unchanged,
 55 |     }
 56 | 
 57 |     def run(self) -> List[nodes.Node]:
 58 |         if self.arguments:
 59 |             # If an argument is given, assume it's a path to a YAML file
 60 |             # Parse it and load it into the directive content
 61 |             path_data_rel = Path(self.arguments[0])
 62 |             path_doc, _ = self.get_source_info()
 63 |             path_doc = Path(path_doc).parent
 64 |             path_data = (path_doc / path_data_rel).resolve()
 65 |             if not path_data.exists():
 66 |                 logger.warn(f"Could not find grid data at {path_data}.")
 67 |                 nodes.text("No grid data found at {path_data}.")
 68 |                 return
 69 |             yaml_string = path_data.read_text()
 70 |         else:
 71 |             yaml_string = "\n".join(self.content)
 72 | 
 73 |         # Read in YAML so we can generate the gallery
 74 |         grid_data = safe_load(yaml_string)
 75 | 
 76 |         grid_items = []
 77 |         for item in grid_data:
 78 |             # Grid card parameters
 79 |             options = {}
 80 |             if "website" in item:
 81 |                 options["link"] = item["website"]
 82 | 
 83 |             if "class-card" in self.options:
 84 |                 options["class-card"] = self.options["class-card"]
 85 | 
 86 |             if "img-background" in item:
 87 |                 options["img-background"] = item["img-background"]
 88 | 
 89 |             if "img-top" in item:
 90 |                 options["img-top"] = item["img-top"]
 91 | 
 92 |             if "img-bottom" in item:
 93 |                 options["img-bottom"] = item["img-bottom"]
 94 | 
 95 |             options_str = "\n".join(f":{k}: {v}" for k, v in options.items()) + "\n\n"
 96 | 
 97 |             # Grid card content
 98 |             content_str = ""
 99 |             if "header" in item:
100 |                 content_str += f"{item['header']}\n\n^^^\n\n"
101 | 
102 |             if "image" in item:
103 |                 content_str += f"![Gallery image]({item['image']})\n\n"
104 | 
105 |             if "content" in item:
106 |                 content_str += f"{item['content']}\n\n"
107 | 
108 |             if "footer" in item:
109 |                 content_str += f"+++\n\n{item['footer']}\n\n"
110 | 
111 |             title = item.get("title", "")
112 |             content_str += "\n"
113 |             grid_items.append(
114 |                 GRID_CARD.format(
115 |                     card_options=options_str, content=content_str, title=title
116 |                 )
117 |             )
118 | 
119 |         # Parse the template with Sphinx Design to create an output
120 |         container = nodes.container()
121 |         # Prep the options for the template grid
122 |         container_options = {"gutter": 2, "class-container": "gallery-directive"}
123 |         if "class-container" in self.options:
124 |             container_options[
125 |                 "class-container"
126 |             ] += f' {self.options["class-container"]}'
127 |         container_options_str = "\n".join(
128 |             f":{k}: {v}" for k, v in container_options.items()
129 |         )
130 | 
131 |         # Create the directive string for the grid
132 |         grid_directive = TEMPLATE_GRID.format(
133 |             grid_columns=self.options.get("grid-columns", "1 2 3 4"),
134 |             container_options=container_options_str,
135 |             content="\n".join(grid_items),
136 |         )
137 |         # Parse content as a directive so Sphinx Design processes it
138 |         self.state.nested_parse([grid_directive], 0, container)
139 |         # Sphinx Design outputs a container too, so just use that
140 |         container = container.children[0]
141 | 
142 |         # Add extra classes
143 |         if self.options.get("container-class", []):
144 |             container.attributes["classes"] += self.options.get("class", [])
145 |         return [container]
146 | 
147 | 
148 | def setup(app: Sphinx) -> Dict[str, Any]:
149 |     """Add custom configuration to sphinx app.
150 | 
151 |     Args:
152 |         app: the Sphinx application
153 |     Returns:
154 |         the 2 parallel parameters set to ``True``.
155 |     """
156 |     app.add_directive("gallery-grid", GalleryDirective)
157 | 
158 |     return {
159 |         "parallel_read_safe": True,
160 |         "parallel_write_safe": True,
161 |     }


--------------------------------------------------------------------------------
/docs/_static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/docs/_static/.nojekyll


--------------------------------------------------------------------------------
/docs/_static/Redis_Favicon_144x144_Red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/docs/_static/Redis_Favicon_144x144_Red.png


--------------------------------------------------------------------------------
/docs/_static/Redis_Favicon_16x16_Red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/docs/_static/Redis_Favicon_16x16_Red.png


--------------------------------------------------------------------------------
/docs/_static/Redis_Favicon_32x32_Red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/docs/_static/Redis_Favicon_32x32_Red.png


--------------------------------------------------------------------------------
/docs/_static/Redis_Logo_Red_RGB.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 28.0.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" [
 4 | 	<!ENTITY ns_extend "http://ns.adobe.com/Extensibility/1.0/">
 5 | 	<!ENTITY ns_ai "http://ns.adobe.com/AdobeIllustrator/10.0/">
 6 | 	<!ENTITY ns_graphs "http://ns.adobe.com/Graphs/1.0/">
 7 | 	<!ENTITY ns_vars "http://ns.adobe.com/Variables/1.0/">
 8 | 	<!ENTITY ns_imrep "http://ns.adobe.com/ImageReplacement/1.0/">
 9 | 	<!ENTITY ns_sfw "http://ns.adobe.com/SaveForWeb/1.0/">
10 | 	<!ENTITY ns_custom "http://ns.adobe.com/GenericCustomNamespace/1.0/">
11 | 	<!ENTITY ns_adobe_xpath "http://ns.adobe.com/XPath/1.0/">
12 | ]>
13 | <svg version="1.1" id="Layer_1" xmlns:x="&ns_extend;" xmlns:i="&ns_ai;" xmlns:graph="&ns_graphs;"
14 | 	 xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
15 | 	 viewBox="0 0 500.8695984 156.7377014" style="enable-background:new 0 0 500.8695984 156.7377014;" xml:space="preserve">
16 | <style type="text/css">
17 | 	.st0{fill:#FF4438;}
18 | </style>
19 | <metadata>
20 | 	<sfw  xmlns="&ns_sfw;">
21 | 		<slices></slices>
22 | 		<sliceSourceBounds  bottomLeftOrigin="true" height="156.7376557" width="500.8696434" x="0" y="-156.7376557">
23 | 			</sliceSourceBounds>
24 | 	</sfw>
25 | </metadata>
26 | <path class="st0" d="M409.5473022,36.9175453c4.7496033-8.4197903,12.5217285-17.487257,15.3283386-20.2938538
27 | 	c12.9535217,5.3973007,25.043457,16.4077988,23.3163452,19.2143936
28 | 	c-4.9655151,8.2038994-12.521759,17.4872589-15.3283386,20.2938576
29 | 	C419.9101257,50.734642,407.8201599,39.9400368,409.5473022,36.9175453z M500.8696289,74.0509872
30 | 	c-1.5112305,7.340332-10.5787048,15.5442276-14.4647522,16.8395844c-3.2383728-6.9085464-6.9085388-11.010498-10.3628235-11.010498
31 | 	c-4.3178406,0-4.5337219,3.0224915-4.5337219,6.9085464c0,6.9085464,4.9655151,22.0209885,4.9655151,37.7811127
32 | 	c0,17.2713623-12.0899658,30.0090027-30.6566772,30.0090027c-17.0014954,0-26.3961792-11.1448975-30.5870972-28.9498825
33 | 	c-11.1058044,19.9014969-27.346405,28.9498825-39.7937317,28.9498825c-19.4591675,0-24.039093-14.3832245-23.5775757-28.971756
34 | 	c-7.8193359,13.818512-22.8697815,28.971756-37.3039856,28.971756c-14.7346497,0-19.9394531-12.8251801-18.7459412-27.7612152
35 | 	c-8.8279419,16.4443207-24.7976379,27.7612152-40.1926117,27.7612152c-16.7059021,0-24.9747467-13.2711487-22.2988586-29.7207947
36 | 	c-11.2388306,13.8138275-32.1601257,29.7207947-53.9110413,29.7207947c-24.8005981,0-35.5960541-13.3733978-36.8755951-30.131546
37 | 	c-11.969986,19.1845551-28.104126,30.7792206-47.322319,30.7792206c-27.7423477,0-37.6651573-24.6698303-39.1091499-44.8493118
38 | 	c-10.2816467,13.7684402-21.8411522,28.0531082-36.0212975,43.9857407
39 | 	c-1.5112457,1.5112457-2.8065987,2.3748169-4.3178425,2.3748169c-4.965518,0-15.1124458-22.0209961-15.7601223-30.2248917
40 | 	c5.7586684-8.9360123,52.7942581-59.6965332,72.3982697-81.557663C59.1548538,48.95121,45.5007324,56.9208755,28.2818642,69.3013611
41 | 	c-3.0224895,2.1589203-11.4422817-17.4872589-11.2263889-32.5997047
42 | 	c19.8620701-14.6806641,50.0869675-23.9640236,74.4827728-23.9640236c34.1109467,0,53.7571335,18.9985046,53.7571335,45.3373375
43 | 	c0,22.0209923-18.3508301,46.2009087-45.1214523,47.0644798c-13.9197693,0.3615723-22.8411255-7.4522324-27.4041672-17.1012802
44 | 	c0.5454254,14.923172,8.3042526,33.2931824,29.1313019,33.2931824c24.1799164,0,34.9745178-15.5442276,53.1094513-38.2128983
45 | 	c13.8170929-17.0554733,29.7931213-32.167923,53.1094666-32.167923c14.2488708,0,23.9640198,8.8515778,23.9640198,22.2368851
46 | 	c0,16.1919098-18.9985046,38.6446838-45.5532379,38.6446838c-4.5354156,0-8.6694183-0.5975494-12.1591034-1.7787399
47 | 	c-0.0879211,0.6808853-0.1467438,1.3503799-0.1467438,1.9946365c0,7.556221,2.8065948,12.0899506,15.112442,12.0899506
48 | 	c18.1349335,0,35.1904144-10.7946014,55.9160614-36.053978c20.2938538-24.8275871,35.6221924-35.6221924,51.8140869-35.6221924
49 | 	c10.9299622,0,19.2257996,5.9238014,22.8818359,15.9006958C341.6498718,37.0203285,360.0592957,14.8132229,375.6522217,0
50 | 	c15.3283386,6.4767628,26.3388367,19.2143955,23.3163452,21.8051014
51 | 	c-11.4422607,10.3628178-49.6551819,52.0299911-64.7676086,76.8575821
52 | 	c-3.8860779,6.4767685-7.5562439,13.6012039-7.5562439,17.055481c0,3.2383804,1.9430542,4.3178406,4.1019592,4.3178406
53 | 	c14.2488708,0,48.1439514-46.2009125,67.3583374-66.4947662c12.0899658,4.965519,24.395813,15.5442276,21.3733215,19.2143936
54 | 	c-15.9760132,18.9985046-28.065979,34.5427399-28.065979,43.39431c0,2.3748169,0.8635559,3.8860626,4.1019592,3.8860626
55 | 	c6.0449829,0,11.6581726-5.3973083,20.9415283-16.8395844c1.9430237-2.3748169,4.3178406-2.3748169,5.829071,1.2953491
56 | 	c4.1019592,9.9310379,10.1469421,15.3283386,14.8965759,15.3283386c5.6131897,0,8.4197693-4.9655151,8.4197693-12.5217361
57 | 	c0-9.0674667-1.9430237-20.725647-1.9430237-25.9070511c0-17.4872627,12.9535217-27.6341896,29.1454468-27.6341896
58 | 	C484.8936157,53.7571297,495.6882324,59.586216,500.8696289,74.0509872z M101.7695084,42.5596237
59 | 	C94.4985352,53.923687,87.7306137,64.5392914,80.8937531,74.8917847c3.7157059,2.0868683,8.4143066,3.6929321,14.5305481,3.6929321
60 | 	c11.4422836,0,23.9640198-6.2608643,23.9640198-18.9985008C119.3883209,51.856163,114.5906296,44.7296677,101.7695084,42.5596237z
61 | 	 M177.6557922,100.5352936c2.2689667,0.8765869,4.9328308,1.3657684,7.795517,1.3657684
62 | 	c15.3283386,0,25.6911469-11.6581726,25.6911469-19.4302826c0-3.454277-2.1589203-5.8290863-5.6131897-5.8290863
63 | 	C196.8676453,76.6416931,183.8030243,88.7969513,177.6557922,100.5352936z M306.1349792,84.4138031
64 | 	c0-4.3178406-2.3748169-6.9085464-6.2608643-6.9085464c-12.7376404,0-31.9520264,24.1799164-31.9520264,36.2698746
65 | 	c0,3.886055,2.158905,6.4767609,6.6926575,6.4767609C288.6477356,120.2518921,306.1349792,94.7766266,306.1349792,84.4138031z"/>
66 | </svg>
67 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .logo__image {
2 |     transform: scale(.7);
3 | }


--------------------------------------------------------------------------------
/docs/_static/css/sidebar.css:
--------------------------------------------------------------------------------
 1 | .custom_sidebar {
 2 |     width: auto;
 3 |     background-color: inherit;
 4 | 
 5 | }
 6 | 
 7 | 
 8 | /* Style the sidebar links */
 9 | .custom_sidebar ul {
10 |     list-style-type: none;
11 |     padding: 6px;
12 | }
13 | 
14 | /* Style the sidebar links */
15 | .custom_sidebar li {
16 |     list-style-type: none;
17 |     padding: 5px;
18 | }
19 | 
20 | .custom_sidebar a {
21 |     text-decoration: none; /* Removes underline */
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/docs/_static/gallery.yaml:
--------------------------------------------------------------------------------
 1 | - title: Arxiv Paper Search
 2 |   website: https://github.com/redis-developer/redis-arxiv-search
 3 | - title: eCommerce Search
 4 |   website: https://github.com/redis-developer/redis-product-search
 5 | - title: Real-Time Embeddings with Redis and Bytewax
 6 |   website: https://github.com/awmatheson/real-time-embeddings
 7 | - title: Redis RAG Workbench
 8 |   website: https://github.com/redis-developer/redis-rag-workbench
 9 | - title: LLM Recommender for Hotels
10 |   website: https://github.com/redis-developer/LLM-Recommender
11 | - title: Agentic RAG
12 |   website: https://github.com/redis-developer/agentic-rag
13 | 


--------------------------------------------------------------------------------
/docs/_static/site.webmanifest:
--------------------------------------------------------------------------------
1 | {"name":"","short_name":"","icons":[{"src":"Redis_Favicon_144x144_Red.png","sizes":"144x144","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% block footer %}
 4 | {{ super() }}
 5 | <!-- Google tag (gtag.js) -->
 6 | <script async src="https://www.googletagmanager.com/gtag/js?id=G-2EW85FTY8C"></script>
 7 | <script>
 8 |   window.dataLayer = window.dataLayer || [];
 9 |   function gtag(){dataLayer.push(arguments);}
10 |   gtag('js', new Date());
11 | 
12 |   gtag('config', 'G-2EW85FTY8C');
13 | </script>
14 | {% endblock %}


--------------------------------------------------------------------------------
/docs/api/cache.rst:
--------------------------------------------------------------------------------
 1 | *********
 2 | LLM Cache
 3 | *********
 4 | 
 5 | SemanticCache
 6 | =============
 7 | 
 8 | .. _semantic_cache_api:
 9 | 
10 | .. currentmodule:: redisvl.extensions.cache.llm
11 | 
12 | .. autoclass:: SemanticCache
13 |    :show-inheritance:
14 |    :members:
15 |    :inherited-members:
16 | 
17 | 
18 | ****************
19 | Embeddings Cache
20 | ****************
21 | 
22 | EmbeddingsCache
23 | ===============
24 | 
25 | .. _embeddings_cache_api:
26 | 
27 | .. currentmodule:: redisvl.extensions.cache.embeddings
28 | 
29 | .. autoclass:: EmbeddingsCache
30 |    :show-inheritance:
31 |    :members:
32 |    :inherited-members:
33 | 


--------------------------------------------------------------------------------
/docs/api/filter.rst:
--------------------------------------------------------------------------------
 1 | ******
 2 | Filter
 3 | ******
 4 | 
 5 | .. _filter_api:
 6 | 
 7 | FilterExpression
 8 | ================
 9 | 
10 | .. currentmodule:: redisvl.query.filter
11 | 
12 | .. autoclass:: FilterExpression
13 | 
14 | Tag
15 | ===
16 | 
17 | .. currentmodule:: redisvl.query.filter
18 | 
19 | .. autoclass:: Tag
20 |     :members:
21 |     :special-members:
22 |     :exclude-members: __hash__
23 | 
24 | 
25 | Text
26 | ====
27 | 
28 | 
29 | .. currentmodule:: redisvl.query.filter
30 | 
31 | 
32 | .. autoclass:: Text
33 |     :members:
34 |     :special-members:
35 |     :exclude-members: __hash__
36 | 
37 | 
38 | Num
39 | ===
40 | 
41 | 
42 | .. currentmodule:: redisvl.query.filter
43 | 
44 | 
45 | .. autoclass:: Num
46 |     :members:
47 |     :special-members:
48 |     :exclude-members: __hash__
49 | 
50 | 
51 | Geo
52 | ===
53 | 
54 | .. currentmodule:: redisvl.query.filter
55 | 
56 | .. autoclass:: Geo
57 |     :members:
58 |     :special-members:
59 |     :exclude-members: __hash__
60 | 
61 | 
62 | GeoRadius
63 | =========
64 | 
65 | .. currentmodule:: redisvl.query.filter
66 | 
67 | .. autoclass:: GeoRadius
68 |     :members:
69 |     :special-members:
70 |     :exclude-members: __hash__
71 | 


--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |         API documentation for RedisVL
 6 | ---
 7 | 
 8 | # RedisVL API
 9 | 
10 | Reference documentation for the RedisVL API.
11 | 
12 | ```{toctree}
13 | :caption: RedisVL API
14 | :maxdepth: 2
15 | 
16 | schema
17 | searchindex
18 | query
19 | filter
20 | vectorizer
21 | reranker
22 | cache
23 | message_history
24 | router
25 | threshold_optimizer
26 | ```
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/api/message_history.rst:
--------------------------------------------------------------------------------
 1 | *******************
 2 | LLM Message History
 3 | *******************
 4 | 
 5 | SemanticMessageHistory
 6 | ======================
 7 | 
 8 | .. _semantic_message_history_api:
 9 | 
10 | .. currentmodule:: redisvl.extensions.message_history.semantic_history
11 | 
12 | .. autoclass:: SemanticMessageHistory
13 |    :show-inheritance:
14 |    :members:
15 |    :inherited-members:
16 | 
17 | 
18 | MessageHistory
19 | ==============
20 | 
21 | .. _message_history_api:
22 | 
23 | .. currentmodule:: redisvl.extensions.message_history.message_history
24 | 
25 | .. autoclass:: MessageHistory
26 |    :show-inheritance:
27 |    :members:
28 |    :inherited-members:
29 | 


--------------------------------------------------------------------------------
/docs/api/query.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | *****
 3 | Query
 4 | *****
 5 | 
 6 | Query classes in RedisVL provide a structured way to define simple or complex
 7 | queries for different use cases. Each query class wraps the ``redis-py`` Query module
 8 | https://github.com/redis/redis-py/blob/master/redis/commands/search/query.py with extended functionality for ease-of-use.
 9 | 
10 | 
11 | VectorQuery
12 | ===========
13 | 
14 | .. currentmodule:: redisvl.query
15 | 
16 | 
17 | .. autoclass:: VectorQuery
18 |    :members:
19 |    :inherited-members:
20 |    :show-inheritance:
21 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
22 | 
23 | 
24 | VectorRangeQuery
25 | ================
26 | 
27 | 
28 | .. currentmodule:: redisvl.query
29 | 
30 | 
31 | .. autoclass:: VectorRangeQuery
32 |    :members:
33 |    :inherited-members:
34 |    :show-inheritance:
35 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
36 | 
37 | HybridQuery
38 | ================
39 | 
40 | 
41 | .. currentmodule:: redisvl.query
42 | 
43 | 
44 | .. autoclass:: HybridQuery
45 |    :members:
46 |    :inherited-members:
47 |    :show-inheritance:
48 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
49 | 
50 | 
51 | TextQuery
52 | ================
53 | 
54 | 
55 | .. currentmodule:: redisvl.query
56 | 
57 | 
58 | .. autoclass:: TextQuery
59 |    :members:
60 |    :inherited-members:
61 |    :show-inheritance:
62 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
63 | 
64 | 
65 | FilterQuery
66 | ===========
67 | 
68 | 
69 | .. currentmodule:: redisvl.query
70 | 
71 | 
72 | .. autoclass:: FilterQuery
73 |    :members:
74 |    :inherited-members:
75 |    :show-inheritance:
76 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
77 | 
78 | 
79 | 
80 | CountQuery
81 | ==========
82 | 
83 | .. currentmodule:: redisvl.query
84 | 
85 | 
86 | .. autoclass:: CountQuery
87 |    :members:
88 |    :inherited-members:
89 |    :show-inheritance:
90 |    :exclude-members: add_filter,get_args,highlight,return_field,summarize
91 | 


--------------------------------------------------------------------------------
/docs/api/reranker.rst:
--------------------------------------------------------------------------------
 1 | ***********
 2 | Rerankers
 3 | ***********
 4 | 
 5 | CohereReranker
 6 | ==============
 7 | 
 8 | .. _coherereranker_api:
 9 | 
10 | .. currentmodule:: redisvl.utils.rerank.cohere
11 | 
12 | .. autoclass:: CohereReranker
13 |    :show-inheritance:
14 |    :members:
15 | 
16 | 
17 | HFCrossEncoderReranker
18 | ======================
19 | 
20 | .. _hfcrossencoderreranker_api:
21 | 
22 | .. currentmodule:: redisvl.utils.rerank.hf_cross_encoder
23 | 
24 | .. autoclass:: HFCrossEncoderReranker
25 |    :show-inheritance:
26 |    :members:
27 | 
28 | 
29 | VoyageAIReranker
30 | ================
31 | 
32 | .. _voyageaireranker_api:
33 | 
34 | .. currentmodule:: redisvl.utils.rerank.voyageai
35 | 
36 | .. autoclass:: VoyageAIReranker
37 |    :show-inheritance:
38 |    :members:
39 | 


--------------------------------------------------------------------------------
/docs/api/router.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ***************
 3 | Semantic Router
 4 | ***************
 5 | 
 6 | .. _semantic_router_api:
 7 | 
 8 | 
 9 | Semantic Router
10 | ===============
11 | 
12 | .. currentmodule:: redisvl.extensions.router
13 | 
14 | .. autoclass:: SemanticRouter
15 |    :members:
16 | 
17 | 
18 | Routing Config
19 | ==============
20 | 
21 | .. currentmodule:: redisvl.extensions.router
22 | 
23 | .. autoclass:: RoutingConfig
24 |    :members:
25 | 
26 | 
27 | Route
28 | =====
29 | 
30 | .. currentmodule:: redisvl.extensions.router
31 | 
32 | .. autoclass:: Route
33 |    :members:
34 | 
35 | 
36 | Route Match
37 | ===========
38 | 
39 | .. currentmodule:: redisvl.extensions.router.schema
40 | 
41 | .. autoclass:: RouteMatch
42 |    :members:
43 | 
44 | 
45 | Distance Aggregation Method
46 | ===========================
47 | 
48 | .. currentmodule:: redisvl.extensions.router.schema
49 | 
50 | .. autoclass:: DistanceAggregationMethod
51 |    :members:
52 | 


--------------------------------------------------------------------------------
/docs/api/schema.rst:
--------------------------------------------------------------------------------
  1 | ***********
  2 | Schema
  3 | ***********
  4 | 
  5 | Schema in RedisVL provides a structured format to define index settings and
  6 | field configurations using the following three components:
  7 | 
  8 | .. list-table::
  9 |    :widths: 20 80
 10 |    :header-rows: 1
 11 | 
 12 |    * - Component
 13 |      - Description
 14 |    * - `version`
 15 |      - The version of the schema spec. Current supported version is `0.1.0`.
 16 |    * - `index`
 17 |      - Index specific settings like name, key prefix, key separator, and storage type.
 18 |    * - `fields`
 19 |      - Subset of fields within your data to include in the index and any custom settings.
 20 | 
 21 | 
 22 | IndexSchema
 23 | ===========
 24 | 
 25 | .. _indexschema_api:
 26 | 
 27 | .. currentmodule:: redisvl.schema
 28 | 
 29 | .. autoclass:: IndexSchema
 30 |    :members:
 31 |    :exclude-members: generate_fields,validate_and_create_fields,redis_fields
 32 | 
 33 | 
 34 | Defining Fields
 35 | ===============
 36 | 
 37 | Fields in the schema can be defined in YAML format or as a Python dictionary, specifying a name, type, an optional path, and attributes for customization.
 38 | 
 39 | **YAML Example**:
 40 | 
 41 | .. code-block:: yaml
 42 | 
 43 |     - name: title
 44 |       type: text
 45 |       path: $.document.title
 46 |       attrs:
 47 |         weight: 1.0
 48 |         no_stem: false
 49 |         withsuffixtrie: true
 50 | 
 51 | **Python Dictionary Example**:
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |     {
 56 |         "name": "location",
 57 |         "type": "geo",
 58 |         "attrs": {
 59 |             "sortable": true
 60 |         }
 61 |     }
 62 | 
 63 | Supported Field Types and Attributes
 64 | ====================================
 65 | 
 66 | Each field type supports specific attributes that customize its behavior. Below are the field types and their available attributes:
 67 | 
 68 | **Text Field Attributes**:
 69 | 
 70 | - `weight`: Importance of the field in result calculation.
 71 | - `no_stem`: Disables stemming during indexing.
 72 | - `withsuffixtrie`: Optimizes queries by maintaining a suffix trie.
 73 | - `phonetic_matcher`: Enables phonetic matching.
 74 | - `sortable`: Allows sorting on this field.
 75 | 
 76 | **Tag Field Attributes**:
 77 | 
 78 | - `separator`: Character for splitting text into individual tags.
 79 | - `case_sensitive`: Case sensitivity in tag matching.
 80 | - `withsuffixtrie`: Suffix trie optimization for queries.
 81 | - `sortable`: Enables sorting based on the tag field.
 82 | 
 83 | **Numeric and Geo Field Attributes**:
 84 | 
 85 | - Both numeric and geo fields support the `sortable` attribute, enabling sorting on these fields.
 86 | 
 87 | **Common Vector Field Attributes**:
 88 | 
 89 | - `dims`: Dimensionality of the vector.
 90 | - `algorithm`: Indexing algorithm (`flat` or `hnsw`).
 91 | - `datatype`: Float datatype of the vector (`bfloat16`, `float16`, `float32`, `float64`).
 92 | - `distance_metric`: Metric for measuring query relevance (`COSINE`, `L2`, `IP`).
 93 | 
 94 | **HNSW Vector Field Specific Attributes**:
 95 | 
 96 | - `m`: Max outgoing edges per node in each layer.
 97 | - `ef_construction`: Max edge candidates during build time.
 98 | - `ef_runtime`: Max top candidates during search.
 99 | - `epsilon`: Range search boundary factor.
100 | 
101 | Note:
102 |     See fully documented Redis-supported fields and options here: https://redis.io/commands/ft.create/


--------------------------------------------------------------------------------
/docs/api/searchindex.rst:
--------------------------------------------------------------------------------
 1 | ********************
 2 | Search Index Classes
 3 | ********************
 4 | 
 5 | .. list-table::
 6 |    :widths: 25 75
 7 |    :header-rows: 1
 8 | 
 9 |    * - Class
10 |      - Description
11 |    * - :ref:`searchindex_api`
12 |      - Primary class to write, read, and search across data structures in Redis.
13 |    * - :ref:`asyncsearchindex_api`
14 |      - Async version of the SearchIndex to write, read, and search across data structures in Redis.
15 | 
16 | .. _searchindex_api:
17 | 
18 | SearchIndex
19 | ===========
20 | 
21 | .. currentmodule:: redisvl.index
22 | 
23 | .. autoclass:: SearchIndex
24 |    :inherited-members:
25 |    :members:
26 | 
27 | .. _asyncsearchindex_api:
28 | 
29 | AsyncSearchIndex
30 | ================
31 | 
32 | .. currentmodule:: redisvl.index
33 | 
34 | .. autoclass:: AsyncSearchIndex
35 |    :inherited-members:
36 |    :members:
37 | 


--------------------------------------------------------------------------------
/docs/api/threshold_optimizer.rst:
--------------------------------------------------------------------------------
 1 | ********************
 2 | Threshold Optimizers
 3 | ********************
 4 | 
 5 | CacheThresholdOptimizer
 6 | =======================
 7 | 
 8 | .. _cachethresholdoptimizer_api:
 9 | 
10 | .. currentmodule:: redisvl.utils.optimize.cache
11 | 
12 | .. autoclass:: CacheThresholdOptimizer
13 |    :show-inheritance:
14 |    :members:
15 | 
16 | 
17 | RouterThresholdOptimizer
18 | ========================
19 | 
20 | .. _routerthresholdoptimizer_api:
21 | 
22 | .. currentmodule:: redisvl.utils.optimize.router
23 | 
24 | .. autoclass:: RouterThresholdOptimizer
25 |    :show-inheritance:
26 |    :members:
27 | 


--------------------------------------------------------------------------------
/docs/api/vectorizer.rst:
--------------------------------------------------------------------------------
 1 | ***********
 2 | Vectorizers
 3 | ***********
 4 | 
 5 | HFTextVectorizer
 6 | ================
 7 | 
 8 | .. _hftextvectorizer_api:
 9 | 
10 | .. currentmodule:: redisvl.utils.vectorize.text.huggingface
11 | 
12 | .. autoclass:: HFTextVectorizer
13 |    :show-inheritance:
14 |    :members:
15 | 
16 | 
17 | OpenAITextVectorizer
18 | ====================
19 | 
20 | .. _openaitextvectorizer_api:
21 | 
22 | .. currentmodule:: redisvl.utils.vectorize.text.openai
23 | 
24 | .. autoclass:: OpenAITextVectorizer
25 |    :show-inheritance:
26 |    :members:
27 | 
28 | 
29 | AzureOpenAITextVectorizer
30 | =========================
31 | 
32 | .. _azureopenaitextvectorizer_api:
33 | 
34 | .. currentmodule:: redisvl.utils.vectorize.text.azureopenai
35 | 
36 | .. autoclass:: AzureOpenAITextVectorizer
37 |    :show-inheritance:
38 |    :members:
39 | 
40 | 
41 | VertexAITextVectorizer
42 | ======================
43 | 
44 | .. _vertexaitextvectorizer_api:
45 | 
46 | .. currentmodule:: redisvl.utils.vectorize.text.vertexai
47 | 
48 | .. autoclass:: VertexAITextVectorizer
49 |    :show-inheritance:
50 |    :members:
51 | 
52 | 
53 | CohereTextVectorizer
54 | ====================
55 | 
56 | .. _coheretextvectorizer_api:
57 | 
58 | .. currentmodule:: redisvl.utils.vectorize.text.cohere
59 | 
60 | .. autoclass:: CohereTextVectorizer
61 |    :show-inheritance:
62 |    :members:
63 | 
64 | 
65 | BedrockTextVectorizer
66 | =====================
67 | 
68 | .. _bedrocktextvectorizer_api:
69 | 
70 | .. currentmodule:: redisvl.utils.vectorize.text.bedrock
71 | 
72 | .. autoclass:: BedrockTextVectorizer
73 |    :show-inheritance:
74 |    :members:
75 | 
76 | 
77 | CustomTextVectorizer
78 | ====================
79 | 
80 | .. _customtextvectorizer_api:
81 | 
82 | .. currentmodule:: redisvl.utils.vectorize.text.custom
83 | 
84 | .. autoclass:: CustomTextVectorizer
85 |    :show-inheritance:
86 |    :members:
87 | 
88 | 
89 | VoyageAITextVectorizer
90 | ======================
91 | 
92 | .. _voyageaitextvectorizer_api:
93 | 
94 | .. currentmodule:: redisvl.utils.vectorize.text.voyageai
95 | 
96 | .. autoclass:: VoyageAITextVectorizer
97 |    :show-inheritance:
98 |    :members:
99 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | print(f"Sphinx is using Python executable at: {sys.executable}", flush=True)
 18 | print(f"Python version: {sys.version}", flush=True)
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | from redisvl.version import __version__
 23 | 
 24 | project = 'RedisVL'
 25 | copyright = '2024, Redis Inc.'
 26 | author = 'Redis Applied AI'
 27 | version = __version__
 28 | 
 29 | # The full version, including alpha/beta/rc tags
 30 | release = version
 31 | 
 32 | 
 33 | # -- General configuration ---------------------------------------------------
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | extensions = [
 39 |     'sphinx.ext.autodoc',
 40 |     'sphinx.ext.todo',
 41 |     'sphinx.ext.coverage',
 42 |     'sphinx.ext.imgmath',
 43 |     'sphinx.ext.viewcode',
 44 |     'sphinx.ext.githubpages',
 45 |     'sphinx.ext.autosummary',
 46 |     'sphinx.ext.napoleon',
 47 |     "sphinx_design",
 48 |     "sphinx_copybutton",
 49 |     "_extension.gallery_directive",
 50 |     'nbsphinx',
 51 |     "myst_nb",
 52 |     "sphinx_favicon"
 53 | ]
 54 | 
 55 | 
 56 | # Add any paths that contain templates here, relative to this directory.
 57 | templates_path = ['_templates']
 58 | 
 59 | # List of patterns, relative to source directory, that match files and
 60 | # directories to ignore when looking for source files.
 61 | # This pattern also affects html_static_path and html_extra_path.
 62 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "**.ipynb_checkpoints"]
 63 | 
 64 | 
 65 | # -- Options for HTML output -------------------------------------------------
 66 | 
 67 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 68 | # a list of builtin themes.
 69 | html_theme = "pydata_sphinx_theme"
 70 | 
 71 | # Add any paths that contain custom static files (such as style sheets) here,
 72 | # relative to this directory. They are copied after the builtin static files,
 73 | # so a file named "default.css" will overwrite the builtin "default.css".
 74 | html_static_path = ['_static']
 75 | html_css_files=["css/custom.css"]
 76 | html_title = "RedisVL"
 77 | html_context = {
 78 |    "default_mode": "dark"
 79 | }
 80 | html_logo = "_static/Redis_Favicon_32x32_Red.png"
 81 | html_favicon = "_static/Redis_Favicon_32x32_Red.png"
 82 | html_context = {
 83 |     "github_user": "redis",
 84 |     "github_repo": "redis-vl-python",
 85 |     "github_version": "main",
 86 |     "doc_path": "docs",
 87 | }
 88 | html_sidebars = {
 89 |     'examples': []
 90 | }
 91 | 
 92 | 
 93 | # This allows us to use ::: to denote directives, useful for admonitions
 94 | myst_enable_extensions = ["colon_fence"]
 95 | myst_heading_anchors = 3
 96 | 
 97 | html_theme_options = {
 98 |     "logo": {
 99 |         "text": "RedisVL",
100 |         "image_dark": "_static/Redis_Logo_Red_RGB.svg",
101 |         "alt_text": "RedisVL",
102 |     },
103 |     "use_edit_page_button": True,
104 |     "show_toc_level": 4,
105 |     "show_nav_level": 2,
106 |     "navigation_depth": 5,
107 |     "navbar_align": "content",  # [left, content, right] For testing that the navbar items align properly
108 |     "secondary_sidebar_items": {
109 |         "examples": [],
110 |     },
111 |     "navbar_start": ["navbar-logo"],
112 |     "icon_links": [
113 |         {
114 |             "name": "GitHub",
115 |             "url": "https://github.com/redis/redis-vl-python",
116 |             "icon": "fa-brands fa-square-github",
117 |             "type": "fontawesome",
118 |         }
119 |     ]
120 | }
121 | 
122 | 
123 | autoclass_content = 'both'
124 | add_module_names = False
125 | 
126 | nbsphinx_execute = 'never'
127 | jupyter_execute_notebooks = "off"
128 | 
129 | # -- Options for autosummary/autodoc output ------------------------------------
130 | autosummary_generate = True
131 | autodoc_typehints = "description"
132 | autodoc_member_order = "groupwise"
133 | 
134 | # -- Options for autoapi -------------------------------------------------------
135 | autoapi_type = "python"
136 | autoapi_dirs = ["../src/redisvl"]
137 | autoapi_keep_files = True
138 | autoapi_root = "api"
139 | autoapi_member_order = "groupwise"
140 | 
141 | 
142 | # -- favicon options ---------------------------------------------------------
143 | 
144 | # see https://sphinx-favicon.readthedocs.io for more information about the
145 | # sphinx-favicon extension
146 | 
147 | favicons = [
148 |     # generic icons compatible with most browsers
149 |     "Redis_Favicon_32x32_Red.png",
150 |     "Redis_Favicon_16x16_Red.png",
151 |     "Redis_Favicon_144x144_Red.png",
152 | ]
153 | 


--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |         Examples for RedisVL users
 6 | ---
 7 | 
 8 | 
 9 | # Example Gallery
10 | 
11 | Explore community examples of RedisVL in the wild.
12 | 
13 | ```{note}
14 | If you are using RedisVL, please consider adding your example to this page by
15 | opening a Pull Request on [GitHub](https://github.com/redis/redis-vl-python)
16 | ```
17 | 
18 | ```{gallery-grid} ../_static/gallery.yaml
19 | :grid-columns: "1 1 2 2"
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |       Top-level documentation for RedisVL, with links to the rest
 6 |       of the site..
 7 | html_theme.sidebar_secondary.remove: false
 8 | ---
 9 | 
10 | # Redis Vector Library (RedisVL)
11 | 
12 | A powerful, AI-native Python client library for [Redis](https://redis.io). Leverage the speed, flexibility, and reliability of Redis for real-time data to supercharge your AI application.
13 | 
14 | ```{gallery-grid}
15 | :grid-columns: 1 2 2 3
16 | 
17 | - header: "{fab}`bootstrap;pst-color-primary` Index Management"
18 |   content: "Design search schema and indices with ease from YAML, with Python, or from the CLI."
19 | - header: "{fas}`bolt;pst-color-primary` Advanced Vector Search"
20 |   content: "Perform powerful vector search queries with complex filtering support."
21 | - header: "{fas}`circle-half-stroke;pst-color-primary` Embedding Creation"
22 |   content: "Use OpenAI or any of the other supported vectorizers to create embeddings."
23 |   link: "user_guide/04_vectorizers"
24 | - header: "{fas}`palette;pst-color-primary` CLI"
25 |   content: "Interact with RedisVL using a Command Line Interface (CLI) for ease of use."
26 | - header: "{fab}`python;pst-color-primary` Semantic Caching"
27 |   content: "Extend RedisVL to cache LLM results, increasing QPS and decreasing system cost."
28 |   link: "user_guide/03_llmcache"
29 | - header: "{fas}`lightbulb;pst-color-primary` Example Gallery"
30 |   content: "Explore the gallery of examples to get started."
31 |   link: "examples/index"
32 | ```
33 | 
34 | ## Installation
35 | 
36 | Install `redisvl` into your Python (>=3.8) environment using `pip`:
37 | 
38 | ```bash
39 | pip install redisvl
40 | ```
41 | 
42 | Then make sure to have [Redis](https://redis.io) accessible with Search & Query features enabled on [Redis Cloud](https://redis.io/cloud) or locally in docker with [Redis Stack](https://redis.io/docs/getting-started/install-stack/docker/):
43 | 
44 | ```bash
45 | docker run -d --name redis -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
46 | ```
47 | 
48 | This will also spin up the [Redis Insight GUI](https://redis.io/insight/) at `http://localhost:8001`.
49 | 
50 | 
51 | ## Table of Contents
52 | 
53 | ```{toctree}
54 | :maxdepth: 2
55 | 
56 | Overview <overview/index>
57 | API <api/index>
58 | User Guides <user_guide/index>
59 | Example Gallery <examples/index>
60 | ```
61 | 
62 | ```{toctree}
63 | :hidden:
64 | 
65 | Changelog <https://github.com/redis/redis-vl-python/releases>
66 | ```
67 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/overview/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |       User Guides for RedisVL
 6 | ---
 7 | 
 8 | # Overview
 9 | 
10 | 
11 | ```{toctree}
12 | :caption: Overview
13 | :maxdepth: 2
14 | 
15 | installation
16 | cli
17 | ```
18 | 
19 | 


--------------------------------------------------------------------------------
/docs/overview/installation.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |       Installation instructions for RedisVL
 6 | ---
 7 | 
 8 | # Install RedisVL
 9 | 
10 | There are a few ways to install RedisVL. The easiest way is to use pip.
11 | 
12 | ## Install RedisVL with Pip
13 | 
14 | Install `redisvl` into your Python (>=3.8) environment using `pip`:
15 | 
16 | ```bash
17 | $ pip install -U redisvl
18 | ```
19 | 
20 | RedisVL comes with a few dependencies that are automatically installed, however, a few dependencies
21 | are optional and can be installed separately if needed:
22 | 
23 | ```bash
24 | $ pip install redisvl[all]  # install vectorizer dependencies
25 | $ pip install redisvl[dev]  # install dev dependencies
26 | ```
27 | 
28 | If you use ZSH, remember to escape the brackets:
29 | 
30 | ```bash
31 | $ pip install redisvl\[all\]
32 | ```
33 | 
34 | This library supports the use of hiredis, so you can also install by running:
35 | 
36 | ```bash
37 | pip install redisvl[hiredis]
38 | ```
39 | 
40 | ## Install RedisVL from Source
41 | 
42 | To install RedisVL from source, clone the repository and install the package using `pip`:
43 | 
44 | ```bash
45 | $ git clone https://github.com/redis/redis-vl-python.git && cd redisvl
46 | $ pip install .
47 | 
48 | # or for an editable installation (for developers of RedisVL)
49 | $ pip install -e .
50 | ```
51 | 
52 | 
53 | ## Installing Redis
54 | 
55 | RedisVL requires a distribution of Redis that supports the [Search and Query](https://redis.com/modules/redis-search/) capability of which there are 3:
56 | 
57 | offering
58 | 1. [Redis Cloud](https://redis.io/cloud), a fully managed cloud offering
59 | 2. [Redis Stack](https://redis.io/docs/getting-started/install-stack/docker/), a local docker image for testing and development
60 | 3. [Redis Enterprise](https://redis.com/redis-enterprise/), a commercial self-hosted
61 | 
62 | 
63 | 
64 | ### Redis Cloud
65 | 
66 | Redis Cloud is the easiest way to get started with RedisVL. You can sign up for a free account [here](https://redis.io/cloud). Make sure to have the `Search and Query`
67 | capability enabled when creating your database.
68 | 
69 | 
70 | ### Redis Stack (local development)
71 | 
72 | For local development and testing, Redis-Stack can be used. We recommend running Redis
73 | in a docker container. To do so, run the following command:
74 | 
75 | ```bash
76 | docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest
77 | ```
78 | 
79 | This will also spin up the [Redis Insight GUI](https://redis.io/insight/) at `http://localhost:8001`.
80 | 
81 | 
82 | ### Redis Enterprise (self-hosted)
83 | 
84 | Redis Enterprise is a commercial offering that can be self-hosted. You can download the latest version [here](https://redis.io/downloads/).
85 | 
86 | If you are considering a self-hosted Redis Enterprise deployment on Kubernetes, there is the [Redis Enterprise Operator](https://docs.redis.com/latest/kubernetes/) for Kubernetes. This will allow you to easily deploy and manage a Redis Enterprise cluster on Kubernetes.


--------------------------------------------------------------------------------
/docs/overview/schema.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | version: '0.1.0'
 3 | 
 4 | index:
 5 |     name: vectorizers
 6 |     prefix: doc
 7 |     storage_type: hash
 8 | 
 9 | fields:
10 |     - name: sentence
11 |       type: text
12 |     - name: embedding
13 |       type: vector
14 |       attrs:
15 |         dims: 768
16 |         algorithm: flat
17 |         distance_metric: cosine
18 | 


--------------------------------------------------------------------------------
/docs/user_guide/hybrid_example_data.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/docs/user_guide/hybrid_example_data.pkl


--------------------------------------------------------------------------------
/docs/user_guide/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |       User guides for RedisVL
 6 | ---
 7 | 
 8 | # User Guides
 9 | User guides provide helpful resources for using RedisVL and its different components.
10 | 
11 | ```{toctree}
12 | :caption: User Guides
13 | :maxdepth: 2
14 | 
15 | 01_getting_started
16 | 02_hybrid_queries
17 | 03_llmcache
18 | 10_embeddings_cache
19 | 04_vectorizers
20 | 05_hash_vs_json
21 | 06_rerankers
22 | 07_message_history
23 | 08_semantic_router
24 | 09_threshold_optimization
25 | release_guide/index
26 | ```
27 | 


--------------------------------------------------------------------------------
/docs/user_guide/jupyterutils.py:
--------------------------------------------------------------------------------
 1 | from IPython.display import HTML, display
 2 | from redis.commands.search.result import Result
 3 | 
 4 | 
 5 | def table_print(dict_list):
 6 |     # If there's nothing in the list, there's nothing to print
 7 |     if len(dict_list) == 0:
 8 |         return
 9 | 
10 |     # Getting column names (dictionary keys) using the first dictionary
11 |     columns = dict_list[0].keys()
12 | 
13 |     # HTML table header
14 |     html = "<table><tr><th>"
15 |     html += "</th><th>".join(columns)
16 |     html += "</th></tr>"
17 | 
18 |     # HTML table content
19 |     for dictionary in dict_list:
20 |         html += "<tr><td>"
21 |         html += "</td><td>".join(str(dictionary[column]) for column in columns)
22 |         html += "</td></tr>"
23 | 
24 |     # HTML table footer
25 |     html += "</table>"
26 | 
27 |     # Displaying the table
28 |     display(HTML(html))
29 | 
30 | 
31 | def result_print(results):
32 |     if isinstance(results, Result):
33 |         # If there's nothing in the list, there's nothing to print
34 |         if len(results.docs) == 0:
35 |             return
36 | 
37 |         results = [doc.__dict__ for doc in results.docs]
38 | 
39 |     to_remove = ["id", "payload"]
40 |     for doc in results:
41 |         for key in to_remove:
42 |             if key in doc:
43 |                 del doc[key]
44 | 
45 |     table_print(results)


--------------------------------------------------------------------------------
/docs/user_guide/release_guide/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | myst:
 3 |   html_meta:
 4 |     "description lang=en": |
 5 |       Release guides for RedisVL
 6 | ---
 7 | 
 8 | # Release Guides
 9 | 
10 | This section contains guidelines and information for RedisVL releases.
11 | 
12 | ```{toctree}
13 | :caption: Release Guides
14 | :maxdepth: 2
15 | 
16 | 0_5_1_release
17 | ```
18 | 


--------------------------------------------------------------------------------
/docs/user_guide/router.yaml:
--------------------------------------------------------------------------------
 1 | name: topic-router
 2 | routes:
 3 | - name: technology
 4 |   references:
 5 |   - what are the latest advancements in AI?
 6 |   - tell me about the newest gadgets
 7 |   - what's trending in tech?
 8 |   metadata:
 9 |     category: tech
10 |     priority: 1
11 |   distance_threshold: 0.71
12 | - name: sports
13 |   references:
14 |   - who won the game last night?
15 |   - tell me about the upcoming sports events
16 |   - what's the latest in the world of sports?
17 |   - sports
18 |   - basketball and football
19 |   metadata:
20 |     category: sports
21 |     priority: 2
22 |   distance_threshold: 0.72
23 | - name: entertainment
24 |   references:
25 |   - what are the top movies right now?
26 |   - who won the best actor award?
27 |   - what's new in the entertainment industry?
28 |   metadata:
29 |     category: entertainment
30 |     priority: 3
31 |   distance_threshold: 0.7
32 | vectorizer:
33 |   type: hf
34 |   model: sentence-transformers/all-mpnet-base-v2
35 | routing_config:
36 |   max_k: 3
37 |   aggregation_method: min
38 | 


--------------------------------------------------------------------------------
/docs/user_guide/schema.yaml:
--------------------------------------------------------------------------------
 1 | version: '0.1.0'
 2 | 
 3 | index:
 4 |     name: vectorizers
 5 |     prefix: doc
 6 |     storage_type: hash
 7 | 
 8 | fields:
 9 |     - name: sentence
10 |       type: text
11 |     - name: embedding
12 |       type: vector
13 |       attrs:
14 |         dims: 768
15 |         algorithm: flat
16 |         distance_metric: cosine


--------------------------------------------------------------------------------
/doctests/data/query_vector_idx.yaml:
--------------------------------------------------------------------------------
 1 | version: '0.1.0'
 2 | 
 3 | index:
 4 |   name: idx:bicycle
 5 |   prefix: bicycle
 6 |   storage_type: json
 7 | 
 8 | fields:
 9 |     - name: description
10 |       type: text
11 |     - name: description_embeddings
12 |       type: vector
13 |       attrs:
14 |         algorithm: flat
15 |         dims: 384
16 |         distance_metric: cosine
17 |         datatype: float32
18 | 


--------------------------------------------------------------------------------
/doctests/query_vector.py:
--------------------------------------------------------------------------------
 1 | # EXAMPLE: query_vector
 2 | # HIDE_START
 3 | import json
 4 | import warnings
 5 | import redis
 6 | import numpy as np
 7 | from redisvl.index import SearchIndex
 8 | from redisvl.query import RangeQuery, VectorQuery
 9 | from redisvl.schema import IndexSchema
10 | from sentence_transformers import SentenceTransformer
11 | 
12 | 
13 | def embed_text(model, text):
14 |     return np.array(model.encode(text)).astype(np.float32).tobytes()
15 | 
16 | r = redis.Redis(decode_responses=True)
17 | 
18 | warnings.filterwarnings("ignore", category=FutureWarning, message=r".*clean_up_tokenization_spaces.*")
19 | model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
20 | 
21 | # create index
22 | schema = IndexSchema.from_yaml('data/query_vector_idx.yaml')
23 | index = SearchIndex(schema, r)
24 | index.create(overwrite=True, drop=True)
25 | 
26 | # load data
27 | with open("data/query_vector.json") as f:
28 |     bicycles = json.load(f)
29 | index.load(bicycles)
30 | # HIDE_END
31 | 
32 | # STEP_START vector1
33 | query = "Bike for small kids"
34 | query_vector = embed_text(model, query)
35 | print(query_vector[:10]) # >>> b'\x02=c=\x93\x0e\xe0=aC'
36 | 
37 | vquery = VectorQuery(
38 |     vector=query_vector,
39 |     vector_field_name="description_embeddings",
40 |     num_results=3,
41 |     return_score=True,
42 |     return_fields=["description"]
43 | )
44 | res = index.query(vquery)
45 | print(res) # >>> [{'id': 'bicycle:6b702e8b...', 'vector_distance': '0.399111807346', 'description': 'Kids want...
46 | # REMOVE_START
47 | assert len(res) == 3
48 | # REMOVE_END
49 | # STEP_END
50 | 
51 | # STEP_START vector2
52 | vquery = RangeQuery(
53 |     vector=query_vector,
54 |     vector_field_name="description_embeddings",
55 |     distance_threshold=0.5,
56 |     return_score=True
57 | ).return_fields("description").dialect(2)
58 | res = index.query(vquery)
59 | print(res) # >>> [{'id': 'bicycle:6bcb1bb4...', 'vector_distance': '0.399111807346', 'description': 'Kids want...
60 | # REMOVE_START
61 | assert len(res) == 2
62 | # REMOVE_END
63 | # STEP_END
64 | 
65 | # REMOVE_START
66 | # destroy index and data
67 | index.delete(drop=True)
68 | # REMOVE_END
69 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "redisvl"
  3 | version = "0.7.0"
  4 | description = "Python client library and CLI for using Redis as a vector database"
  5 | authors = ["Redis Inc. <applied.ai@redis.com>"]
  6 | license = "MIT"
  7 | readme = "README.md"
  8 | homepage = "https://github.com/redis/redis-vl-python"
  9 | repository = "https://github.com/redis/redis-vl-python"
 10 | documentation = "https://docs.redisvl.com"
 11 | keywords = ["ai", "redis", "redis-client", "vector-database", "vector-search"]
 12 | classifiers = [
 13 |   "Programming Language :: Python :: 3.9",
 14 |   "Programming Language :: Python :: 3.10",
 15 |   "Programming Language :: Python :: 3.11",
 16 |   "Programming Language :: Python :: 3.12",
 17 |   "Programming Language :: Python :: 3.13",
 18 |   "License :: OSI Approved :: MIT License",
 19 | ]
 20 | packages = [{ include = "redisvl", from = "." }]
 21 | 
 22 | [tool.poetry.dependencies]
 23 | python = ">=3.9,<3.14"
 24 | numpy = ">=1.26.0,<3"
 25 | pyyaml = ">=5.4,<7.0"
 26 | redis = ">=5.0,<7.0"
 27 | pydantic = ">=2,<3"
 28 | tenacity = ">=8.2.2"
 29 | ml-dtypes = ">=0.4.0,<1.0.0"
 30 | python-ulid = ">=3.0.0"
 31 | jsonpath-ng = ">=1.5.0"
 32 | nltk = { version = "^3.8.1", optional = true }
 33 | openai = { version = ">=1.1.0", optional = true }
 34 | google-cloud-aiplatform = { version = ">=1.26,<2.0.0", optional = true }
 35 | protobuf = { version = ">=5.28.0,<6.0.0", optional = true }
 36 | cohere = { version = ">=4.44", optional = true }
 37 | mistralai = { version = ">=1.0.0", optional = true }
 38 | voyageai = { version = ">=0.2.2", optional = true }
 39 | sentence-transformers = { version = "^3.4.0", optional = true }
 40 | scipy = [
 41 |   { version = ">=1.9.0,<1.14", python = "<3.10", optional = true },
 42 |   { version = ">=1.14.0,<1.16", python = ">=3.10", optional = true }
 43 | ]
 44 | boto3 = { version = "^1.36.0", optional = true }
 45 | urllib3 = { version = "<2.2.0", optional = true }
 46 | ranx = {version = "^0.3.20", optional = true}
 47 | 
 48 | [tool.poetry.extras]
 49 | mistralai = ["mistralai"]
 50 | openai = ["openai"]
 51 | nltk = ["nltk"]
 52 | cohere = ["cohere"]
 53 | voyageai = ["voyageai"]
 54 | sentence-transformers = ["sentence-transformers", "scipy"]
 55 | vertexai = ["google-cloud-aiplatform", "protobuf"]
 56 | bedrock = ["boto3", "urllib3"]
 57 | 
 58 | [tool.poetry.group.dev.dependencies]
 59 | black = "^25.1.0"
 60 | isort = "^5.6.4"
 61 | pylint = "^3.1.0"
 62 | pytest = "^8.1.1"
 63 | pytest-asyncio = "^0.23.6"
 64 | pytest-xdist = {extras = ["psutil"], version = "^3.6.1"}
 65 | pre-commit = "^4.1.0"
 66 | mypy = "^1.11.0"
 67 | nbval = "^0.11.0"
 68 | types-pyyaml = "*"
 69 | types-pyopenssl = "*"
 70 | testcontainers = "^4.3.1"
 71 | cryptography = { version = ">=44.0.1", markers = "python_version > '3.9.1'" }
 72 | 
 73 | [tool.poetry.group.docs.dependencies]
 74 | sphinx = ">=4.4.0"
 75 | pydata-sphinx-theme = "^0.15.2"
 76 | nbsphinx = "^0.9.3"
 77 | jinja2 = "^3.1.3"
 78 | sphinx-copybutton = "^0.5.2"
 79 | sphinx-favicon = "^1.0.1"
 80 | sphinx-design = "^0.5.0"
 81 | myst-nb = "^1.1.0"
 82 | 
 83 | [tool.poetry.scripts]
 84 | rvl = "redisvl.cli.runner:main"
 85 | format = "scripts:format"
 86 | check-format = "scripts:check_format"
 87 | sort-imports = "scripts:sort_imports"
 88 | check-sort-imports = "scripts:check_sort_imports"
 89 | check-lint = "scripts:check_lint"
 90 | check-mypy = "scripts:check_mypy"
 91 | test = "scripts:test"
 92 | test-verbose = "scripts:test_verbose"
 93 | test-notebooks = "scripts:test_notebooks"
 94 | build-docs = "scripts:build_docs"
 95 | serve-docs = "scripts:serve_docs"
 96 | 
 97 | [build-system]
 98 | requires = ["poetry-core>=1.0.0"]
 99 | build-backend = "poetry.core.masonry.api"
100 | 
101 | [tool.black]
102 | target-version = ['py39', 'py310', 'py311', 'py312', 'py313']
103 | exclude = '''
104 | (
105 |   | \.egg
106 |   | \.git
107 |   | \.hg
108 |   | \.mypy_cache
109 |   | \.nox
110 |   | \.tox
111 |   | \.venv
112 |   | _build
113 |   | build
114 |   | dist
115 |   | setup.py
116 | )
117 | '''
118 | 
119 | [tool.pytest.ini_options]
120 | log_cli = true
121 | asyncio_mode = "auto"
122 | 
123 | [tool.mypy]
124 | warn_unused_configs = true
125 | ignore_missing_imports = true
126 | 


--------------------------------------------------------------------------------
/redisvl/__init__.py:
--------------------------------------------------------------------------------
1 | from redisvl.version import __version__
2 | 
3 | all = ["__version__"]
4 | 


--------------------------------------------------------------------------------
/redisvl/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/redisvl/cli/__init__.py


--------------------------------------------------------------------------------
/redisvl/cli/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | from redisvl.cli.index import Index
 5 | from redisvl.cli.stats import Stats
 6 | from redisvl.cli.version import Version
 7 | from redisvl.utils.log import get_logger
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | def _usage():
13 |     usage = [
14 |         "rvl <command> [<args>]\n",
15 |         "Commands:",
16 |         "\tindex       Index manipulation (create, delete, etc.)",
17 |         "\tversion     Obtain the version of RedisVL",
18 |         "\tstats       Obtain statistics about an index",
19 |     ]
20 |     return "\n".join(usage) + "\n"
21 | 
22 | 
23 | class RedisVlCLI:
24 |     def __init__(self):
25 |         parser = argparse.ArgumentParser(
26 |             description="Redis Vector Library CLI", usage=_usage()
27 |         )
28 | 
29 |         parser.add_argument("command", help="Subcommand to run")
30 | 
31 |         if len(sys.argv) < 2:
32 |             parser.print_help()
33 |             exit(0)
34 | 
35 |         args = parser.parse_args(sys.argv[1:2])
36 |         if not hasattr(self, args.command):
37 |             parser.print_help()
38 |             exit(0)
39 |         getattr(self, args.command)()
40 | 
41 |     def index(self):
42 |         Index()
43 |         exit(0)
44 | 
45 |     def version(self):
46 |         Version()
47 |         exit(0)
48 | 
49 |     def stats(self):
50 |         Stats()
51 |         exit(0)
52 | 


--------------------------------------------------------------------------------
/redisvl/cli/runner.py:
--------------------------------------------------------------------------------
 1 | from redisvl.cli.main import RedisVlCLI
 2 | 
 3 | 
 4 | def main():
 5 |     """Main call to init the RedisVL CLI tool."""
 6 |     RedisVlCLI()
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     main()
11 | 


--------------------------------------------------------------------------------
/redisvl/cli/stats.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | from argparse import Namespace
  4 | 
  5 | from redisvl.cli.utils import add_index_parsing_options, create_redis_url
  6 | from redisvl.index import SearchIndex
  7 | from redisvl.schema.schema import IndexSchema
  8 | from redisvl.utils.log import get_logger
  9 | from redisvl.utils.utils import lazy_import
 10 | 
 11 | logger = get_logger("[RedisVL]")
 12 | 
 13 | STATS_KEYS = [
 14 |     "num_docs",
 15 |     "num_terms",
 16 |     "max_doc_id",
 17 |     "num_records",
 18 |     "percent_indexed",
 19 |     "hash_indexing_failures",
 20 |     "number_of_uses",
 21 |     "bytes_per_record_avg",
 22 |     "doc_table_size_mb",
 23 |     "inverted_sz_mb",
 24 |     "key_table_size_mb",
 25 |     "offset_bits_per_record_avg",
 26 |     "offset_vectors_sz_mb",
 27 |     "offsets_per_term_avg",
 28 |     "records_per_doc_avg",
 29 |     "sortable_values_size_mb",
 30 |     "total_indexing_time",
 31 |     "total_inverted_index_blocks",
 32 |     "vector_index_sz_mb",
 33 | ]
 34 | 
 35 | 
 36 | class Stats:
 37 |     usage = "\n".join(
 38 |         [
 39 |             "rvl stats [<args>]\n",
 40 |         ]
 41 |     )
 42 | 
 43 |     def __init__(self):
 44 |         parser = argparse.ArgumentParser(usage=self.usage)
 45 | 
 46 |         parser.add_argument(
 47 |             "-f", "--format", help="Output format", type=str, default="rounded_outline"
 48 |         )
 49 |         parser = add_index_parsing_options(parser)
 50 |         args = parser.parse_args(sys.argv[2:])
 51 |         try:
 52 |             self.stats(args)
 53 |         except Exception as e:
 54 |             logger.error(e)
 55 |             exit(0)
 56 | 
 57 |     def stats(self, args: Namespace):
 58 |         """Obtain stats about an index.
 59 | 
 60 |         Usage:
 61 |             rvl stats -i <index_name> | -s <schema_path>
 62 |         """
 63 |         index = self._connect_to_index(args)
 64 |         _display_stats(index.info(), output_format=args.format)
 65 | 
 66 |     def _connect_to_index(self, args: Namespace) -> SearchIndex:
 67 |         # connect to redis
 68 |         try:
 69 |             redis_url = create_redis_url(args)
 70 |         except ValueError:
 71 |             logger.error(
 72 |                 "Must set REDIS_ADDRESS environment variable or provide host and port"
 73 |             )
 74 |             exit(0)
 75 | 
 76 |         if args.index:
 77 |             schema = IndexSchema.from_dict({"index": {"name": args.index}})
 78 |             index = SearchIndex(schema=schema, redis_url=redis_url)
 79 |         elif args.schema:
 80 |             index = SearchIndex.from_yaml(args.schema, redis_url=redis_url)
 81 |         else:
 82 |             logger.error("Index name or schema must be provided")
 83 |             exit(0)
 84 | 
 85 |         return index
 86 | 
 87 | 
 88 | def _display_stats(index_info, output_format="rounded_outline"):
 89 |     # Extracting the statistics
 90 |     stats_data = [(key, str(index_info.get(key))) for key in STATS_KEYS]
 91 | 
 92 |     # Display the statistics in tabular format
 93 |     print("\nStatistics:")
 94 |     max_key_length = max(len(key) for key, _ in stats_data)
 95 |     horizontal_line = "─" * (max_key_length + 2)
 96 |     print(f"╭{horizontal_line}┬────────────╮")  # top row
 97 |     print("│ Stat Key                    │ Value      │")  # header row
 98 |     print(f"├{horizontal_line}┼────────────┤")  # separator row
 99 |     for key, value in stats_data:
100 |         print(f"│ {key:<27} │ {value[0:10]:<10} │")  # data rows
101 |     print(f"╰{horizontal_line}┴────────────╯")  # bottom row
102 | 


--------------------------------------------------------------------------------
/redisvl/cli/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from argparse import ArgumentParser, Namespace
 3 | 
 4 | from redisvl.utils.log import get_logger
 5 | 
 6 | logger = get_logger("[RedisVL]")
 7 | 
 8 | 
 9 | def create_redis_url(args: Namespace) -> str:
10 |     env_address = os.getenv("REDIS_URL")
11 |     if env_address:
12 |         logger.info(f"Using Redis address from environment variable, REDIS_URL")
13 |         return env_address
14 |     elif args.url:
15 |         return args.url
16 |     else:
17 |         url = "redis://"
18 |         if args.ssl:
19 |             url += "rediss://"
20 |         if args.user:
21 |             url += args.user
22 |             if args.password:
23 |                 url += ":" + args.password
24 |             url += "@"
25 |         url += args.host + ":" + str(args.port)
26 |         return url
27 | 
28 | 
29 | def add_index_parsing_options(parser: ArgumentParser) -> ArgumentParser:
30 |     parser.add_argument("-i", "--index", help="Index name", type=str, required=False)
31 |     parser.add_argument(
32 |         "-s", "--schema", help="Path to schema file", type=str, required=False
33 |     )
34 |     parser.add_argument("-u", "--url", help="Redis URL", type=str, required=False)
35 |     parser.add_argument("--host", help="Redis host", type=str, default="localhost")
36 |     parser.add_argument("-p", "--port", help="Redis port", type=int, default=6379)
37 |     parser.add_argument("--user", help="Redis username", type=str, default="default")
38 |     parser.add_argument("--ssl", help="Use SSL", action="store_true")
39 |     parser.add_argument("-a", "--password", help="Redis password", type=str, default="")
40 |     return parser
41 | 


--------------------------------------------------------------------------------
/redisvl/cli/version.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | from argparse import Namespace
 4 | 
 5 | from redisvl import __version__
 6 | from redisvl.utils.log import get_logger
 7 | 
 8 | logger = get_logger("[RedisVL]")
 9 | 
10 | 
11 | class Version:
12 |     usage = "\n".join(
13 |         [
14 |             "rvl version [<args>]\n",
15 |             "\n",
16 |         ]
17 |     )
18 | 
19 |     def __init__(self):
20 |         parser = argparse.ArgumentParser(usage=self.usage)
21 |         parser.add_argument(
22 |             "-s", "--short", help="Print only the version number", action="store_true"
23 |         )
24 | 
25 |         args = parser.parse_args(sys.argv[2:])
26 |         self.version(args)
27 | 
28 |     def version(self, args: Namespace):
29 |         if args.short:
30 |             print(__version__)
31 |         else:
32 |             logger.info(f"RedisVL version {__version__}")
33 | 


--------------------------------------------------------------------------------
/redisvl/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Exception Classes
 3 | 
 4 | This module defines all custom exceptions used throughout the RedisVL library.
 5 | """
 6 | 
 7 | 
 8 | class RedisVLError(Exception):
 9 |     """Base exception for all RedisVL errors."""
10 | 
11 |     pass
12 | 
13 | 
14 | class RedisModuleVersionError(RedisVLError):
15 |     """Error raised when required Redis modules are missing or have incompatible versions."""
16 | 
17 |     pass
18 | 
19 | 
20 | class RedisSearchError(RedisVLError):
21 |     """Error raised for Redis Search specific operations."""
22 | 
23 |     pass
24 | 
25 | 
26 | class SchemaValidationError(RedisVLError):
27 |     """Error when validating data against a schema."""
28 | 
29 |     def __init__(self, message, index=None):
30 |         if index is not None:
31 |             message = f"Validation failed for object at index {index}: {message}"
32 |         super().__init__(message)
33 | 
34 | 
35 | class QueryValidationError(RedisVLError):
36 |     """Error when validating a query."""
37 | 
38 |     pass
39 | 


--------------------------------------------------------------------------------
/redisvl/extensions/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Redis Vector Library Cache Extensions
 3 | 
 4 | This module provides caching functionality for Redis Vector Library,
 5 | including both embedding caches and LLM response caches.
 6 | """
 7 | 
 8 | from redisvl.extensions.cache.base import BaseCache
 9 | 
10 | __all__ = ["BaseCache"]
11 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Redis Vector Library - Embeddings Cache Extensions
 3 | 
 4 | This module provides embedding caching functionality for RedisVL.
 5 | """
 6 | 
 7 | from redisvl.extensions.cache.embeddings.embeddings import EmbeddingsCache
 8 | from redisvl.extensions.cache.embeddings.schema import CacheEntry
 9 | 
10 | __all__ = ["EmbeddingsCache", "CacheEntry"]
11 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/embeddings/schema.py:
--------------------------------------------------------------------------------
 1 | """Schema definitions for embeddings cache in RedisVL.
 2 | 
 3 | This module defines the Pydantic models used for embedding cache entries and
 4 | related data structures.
 5 | """
 6 | 
 7 | from typing import Any, Dict, List, Optional
 8 | 
 9 | from pydantic import BaseModel, Field, model_validator
10 | 
11 | from redisvl.extensions.constants import EMBEDDING_FIELD_NAME, METADATA_FIELD_NAME
12 | from redisvl.utils.utils import current_timestamp, deserialize, serialize
13 | 
14 | 
15 | class CacheEntry(BaseModel):
16 |     """Embedding cache entry data model"""
17 | 
18 |     entry_id: str
19 |     """Cache entry identifier"""
20 |     text: str
21 |     """The text input that was embedded"""
22 |     model_name: str
23 |     """The name of the embedding model used"""
24 |     embedding: List[float]
25 |     """The embedding vector representation"""
26 |     inserted_at: float = Field(default_factory=current_timestamp)
27 |     """Timestamp of when the entry was added to the cache"""
28 |     metadata: Optional[Dict[str, Any]] = Field(default=None)
29 |     """Optional metadata stored on the cache entry"""
30 | 
31 |     @model_validator(mode="before")
32 |     @classmethod
33 |     def deserialize_cache_entry(cls, values: Dict[str, Any]) -> Dict[str, Any]:
34 |         # Deserialize metadata if necessary
35 |         if METADATA_FIELD_NAME in values and isinstance(
36 |             values[METADATA_FIELD_NAME], str
37 |         ):
38 |             values[METADATA_FIELD_NAME] = deserialize(values[METADATA_FIELD_NAME])
39 |         # Deserialize embeddings if necessary
40 |         if EMBEDDING_FIELD_NAME in values and isinstance(
41 |             values[EMBEDDING_FIELD_NAME], str
42 |         ):
43 |             values[EMBEDDING_FIELD_NAME] = deserialize(values[EMBEDDING_FIELD_NAME])
44 | 
45 |         return values
46 | 
47 |     def to_dict(self) -> Dict[str, Any]:
48 |         """Convert the cache entry to a dictionary for storage"""
49 |         data = self.model_dump(exclude_none=True)
50 |         data[EMBEDDING_FIELD_NAME] = serialize(self.embedding)
51 |         if self.metadata is not None:
52 |             data[METADATA_FIELD_NAME] = serialize(self.metadata)
53 |         return data
54 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/llm/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Redis Vector Library - LLM Cache Extensions
 3 | 
 4 | This module provides LLM cache implementations for RedisVL.
 5 | """
 6 | 
 7 | from redisvl.extensions.cache.llm.schema import (
 8 |     CacheEntry,
 9 |     CacheHit,
10 |     SemanticCacheIndexSchema,
11 | )
12 | from redisvl.extensions.cache.llm.semantic import SemanticCache
13 | 
14 | __all__ = ["SemanticCache", "CacheEntry", "CacheHit", "SemanticCacheIndexSchema"]
15 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/llm/base.py:
--------------------------------------------------------------------------------
  1 | """Base LLM cache interface for RedisVL.
  2 | 
  3 | This module defines the abstract base interface for LLM caches, which store
  4 | prompt-response pairs with semantic retrieval capabilities.
  5 | """
  6 | 
  7 | from typing import Any, Dict, List, Optional
  8 | 
  9 | from redisvl.extensions.cache.base import BaseCache
 10 | from redisvl.query.filter import FilterExpression
 11 | 
 12 | 
 13 | class BaseLLMCache(BaseCache):
 14 |     """Base abstract LLM cache interface.
 15 | 
 16 |     This class defines the core functionality for caching LLM responses
 17 |     with semantic similarity search capabilities.
 18 |     """
 19 | 
 20 |     def __init__(self, name: str, ttl: Optional[int] = None, **kwargs):
 21 |         """Initialize an LLM cache.
 22 | 
 23 |         Args:
 24 |             name (str): The name of the cache.
 25 |             ttl (Optional[int]): The time-to-live for cached responses. Defaults to None.
 26 |             **kwargs: Additional arguments passed to the parent class.
 27 |         """
 28 |         super().__init__(name=name, ttl=ttl, **kwargs)
 29 | 
 30 |     def delete(self) -> None:
 31 |         """Delete the cache and its index entirely."""
 32 |         raise NotImplementedError
 33 | 
 34 |     async def adelete(self) -> None:
 35 |         """Async delete the cache and its index entirely."""
 36 |         raise NotImplementedError
 37 | 
 38 |     def check(
 39 |         self,
 40 |         prompt: Optional[str] = None,
 41 |         vector: Optional[List[float]] = None,
 42 |         num_results: int = 1,
 43 |         return_fields: Optional[List[str]] = None,
 44 |         filter_expression: Optional[FilterExpression] = None,
 45 |         distance_threshold: Optional[float] = None,
 46 |     ) -> List[Dict[str, Any]]:
 47 |         """Check the cache for semantically similar prompts.
 48 | 
 49 |         Args:
 50 |             prompt (Optional[str]): The text prompt to search for in the cache.
 51 |             vector (Optional[List[float]]): Vector representation to search for.
 52 |             num_results (int): Number of results to return. Defaults to 1.
 53 |             return_fields (Optional[List[str]]): Fields to return in results.
 54 |             filter_expression (Optional[FilterExpression]): Optional filter to apply.
 55 |             distance_threshold (Optional[float]): Override for semantic distance threshold.
 56 | 
 57 |         Returns:
 58 |             List[Dict[str, Any]]: List of matching cache entries.
 59 |         """
 60 |         raise NotImplementedError
 61 | 
 62 |     async def acheck(
 63 |         self,
 64 |         prompt: Optional[str] = None,
 65 |         vector: Optional[List[float]] = None,
 66 |         num_results: int = 1,
 67 |         return_fields: Optional[List[str]] = None,
 68 |         filter_expression: Optional[FilterExpression] = None,
 69 |         distance_threshold: Optional[float] = None,
 70 |     ) -> List[Dict[str, Any]]:
 71 |         """Async check the cache for semantically similar prompts."""
 72 |         raise NotImplementedError
 73 | 
 74 |     def store(
 75 |         self,
 76 |         prompt: str,
 77 |         response: str,
 78 |         vector: Optional[List[float]] = None,
 79 |         metadata: Optional[Dict[str, Any]] = None,
 80 |         filters: Optional[Dict[str, Any]] = None,
 81 |         ttl: Optional[int] = None,
 82 |     ) -> str:
 83 |         """Store a prompt-response pair in the cache.
 84 | 
 85 |         Args:
 86 |             prompt (str): The user prompt to cache.
 87 |             response (str): The LLM response to cache.
 88 |             vector (Optional[List[float]]): Optional embedding vector.
 89 |             metadata (Optional[Dict[str, Any]]): Optional metadata.
 90 |             filters (Optional[Dict[str, Any]]): Optional filters for retrieval.
 91 |             ttl (Optional[int]): Optional TTL override.
 92 | 
 93 |         Returns:
 94 |             str: The Redis key for the cached entry.
 95 |         """
 96 |         raise NotImplementedError
 97 | 
 98 |     async def astore(
 99 |         self,
100 |         prompt: str,
101 |         response: str,
102 |         vector: Optional[List[float]] = None,
103 |         metadata: Optional[Dict[str, Any]] = None,
104 |         filters: Optional[Dict[str, Any]] = None,
105 |         ttl: Optional[int] = None,
106 |     ) -> str:
107 |         """Async store a prompt-response pair in the cache."""
108 |         raise NotImplementedError
109 | 
110 |     def update(self, key: str, **kwargs) -> None:
111 |         """Update specific fields within an existing cache entry.
112 | 
113 |         Args:
114 |             key (str): The key of the document to update.
115 |             **kwargs: Field-value pairs to update.
116 |         """
117 |         raise NotImplementedError
118 | 
119 |     async def aupdate(self, key: str, **kwargs) -> None:
120 |         """Async update specific fields within an existing cache entry."""
121 |         raise NotImplementedError
122 | 


--------------------------------------------------------------------------------
/redisvl/extensions/cache/llm/schema.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional
  2 | 
  3 | from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
  4 | 
  5 | from redisvl.extensions.constants import (
  6 |     CACHE_VECTOR_FIELD_NAME,
  7 |     INSERTED_AT_FIELD_NAME,
  8 |     PROMPT_FIELD_NAME,
  9 |     RESPONSE_FIELD_NAME,
 10 |     UPDATED_AT_FIELD_NAME,
 11 | )
 12 | from redisvl.redis.utils import array_to_buffer, hashify
 13 | from redisvl.schema import IndexSchema
 14 | from redisvl.utils.utils import current_timestamp, deserialize, serialize
 15 | 
 16 | 
 17 | class CacheEntry(BaseModel):
 18 |     """A single cache entry in Redis"""
 19 | 
 20 |     entry_id: Optional[str] = Field(default=None)
 21 |     """Cache entry identifier"""
 22 |     prompt: str
 23 |     """Input prompt or question cached in Redis"""
 24 |     response: str
 25 |     """Response or answer to the question, cached in Redis"""
 26 |     prompt_vector: List[float]
 27 |     """Text embedding representation of the prompt"""
 28 |     inserted_at: float = Field(default_factory=current_timestamp)
 29 |     """Timestamp of when the entry was added to the cache"""
 30 |     updated_at: float = Field(default_factory=current_timestamp)
 31 |     """Timestamp of when the entry was updated in the cache"""
 32 |     metadata: Optional[Dict[str, Any]] = Field(default=None)
 33 |     """Optional metadata stored on the cache entry"""
 34 |     filters: Optional[Dict[str, Any]] = Field(default=None)
 35 |     """Optional filter data stored on the cache entry for customizing retrieval"""
 36 | 
 37 |     @model_validator(mode="before")
 38 |     @classmethod
 39 |     def generate_id(cls, values):
 40 |         # Ensure entry_id is set
 41 |         if not values.get("entry_id"):
 42 |             values["entry_id"] = hashify(values["prompt"], values.get("filters"))
 43 |         return values
 44 | 
 45 |     @field_validator("metadata")
 46 |     @classmethod
 47 |     def non_empty_metadata(cls, v):
 48 |         if v is not None and not isinstance(v, dict):
 49 |             raise TypeError("Metadata must be a dictionary.")
 50 |         return v
 51 | 
 52 |     def to_dict(self, dtype: str) -> Dict:
 53 |         data = self.model_dump(exclude_none=True)
 54 |         data["prompt_vector"] = array_to_buffer(self.prompt_vector, dtype)
 55 |         if self.metadata is not None:
 56 |             data["metadata"] = serialize(self.metadata)
 57 |         if self.filters is not None:
 58 |             data.update(self.filters)
 59 |             del data["filters"]
 60 |         return data
 61 | 
 62 | 
 63 | class CacheHit(BaseModel):
 64 |     """A cache hit based on some input query"""
 65 | 
 66 |     entry_id: str
 67 |     """Cache entry identifier"""
 68 |     prompt: str
 69 |     """Input prompt or question cached in Redis"""
 70 |     response: str
 71 |     """Response or answer to the question, cached in Redis"""
 72 |     vector_distance: float
 73 |     """The semantic distance between the query vector and the stored prompt vector"""
 74 |     inserted_at: float
 75 |     """Timestamp of when the entry was added to the cache"""
 76 |     updated_at: float
 77 |     """Timestamp of when the entry was updated in the cache"""
 78 |     metadata: Optional[Dict[str, Any]] = Field(default=None)
 79 |     """Optional metadata stored on the cache entry"""
 80 |     filters: Optional[Dict[str, Any]] = Field(default=None)
 81 |     """Optional filter data stored on the cache entry for customizing retrieval"""
 82 | 
 83 |     # Allow extra fields to simplify handling filters
 84 |     model_config = ConfigDict(extra="allow")
 85 | 
 86 |     @model_validator(mode="before")
 87 |     @classmethod
 88 |     def validate_cache_hit(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 89 |         # Deserialize metadata if necessary
 90 |         if "metadata" in values and isinstance(values["metadata"], str):
 91 |             values["metadata"] = deserialize(values["metadata"])
 92 | 
 93 |         # Collect any extra fields and store them as filters
 94 |         extra_data = values.pop("__pydantic_extra__", {}) or {}
 95 |         if extra_data:
 96 |             current_filters = values.get("filters") or {}
 97 |             if not isinstance(current_filters, dict):
 98 |                 current_filters = {}
 99 |             current_filters.update(extra_data)
100 |             values["filters"] = current_filters
101 | 
102 |         return values
103 | 
104 |     def to_dict(self) -> Dict[str, Any]:
105 |         """Convert this model to a dictionary, merging filters into the result."""
106 |         data = self.model_dump(exclude_none=True)
107 |         if data.get("filters"):
108 |             data.update(data["filters"])
109 |             del data["filters"]
110 |         return data
111 | 
112 | 
113 | class SemanticCacheIndexSchema(IndexSchema):
114 | 
115 |     @classmethod
116 |     def from_params(cls, name: str, prefix: str, vector_dims: int, dtype: str):
117 |         return cls(
118 |             index={"name": name, "prefix": prefix},  # type: ignore
119 |             fields=[  # type: ignore
120 |                 {"name": PROMPT_FIELD_NAME, "type": "text"},
121 |                 {"name": RESPONSE_FIELD_NAME, "type": "text"},
122 |                 {"name": INSERTED_AT_FIELD_NAME, "type": "numeric"},
123 |                 {"name": UPDATED_AT_FIELD_NAME, "type": "numeric"},
124 |                 {
125 |                     "name": CACHE_VECTOR_FIELD_NAME,
126 |                     "type": "vector",
127 |                     "attrs": {
128 |                         "dims": vector_dims,
129 |                         "datatype": dtype,
130 |                         "distance_metric": "cosine",
131 |                         "algorithm": "flat",
132 |                     },
133 |                 },
134 |             ],
135 |         )
136 | 


--------------------------------------------------------------------------------
/redisvl/extensions/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Constants used within the extension classes SemanticCache, BaseMessageHistory,
 3 | MessageHistory, SemanticMessageHistory and SemanticRouter.
 4 | These constants are also used within theses classes corresponding schema.
 5 | """
 6 | 
 7 | # BaseMessageHistory
 8 | ID_FIELD_NAME: str = "entry_id"
 9 | ROLE_FIELD_NAME: str = "role"
10 | CONTENT_FIELD_NAME: str = "content"
11 | TOOL_FIELD_NAME: str = "tool_call_id"
12 | TIMESTAMP_FIELD_NAME: str = "timestamp"
13 | SESSION_FIELD_NAME: str = "session_tag"
14 | 
15 | # SemanticMessageHistory
16 | MESSAGE_VECTOR_FIELD_NAME: str = "vector_field"
17 | 
18 | # SemanticCache
19 | REDIS_KEY_FIELD_NAME: str = "key"
20 | ENTRY_ID_FIELD_NAME: str = "entry_id"
21 | PROMPT_FIELD_NAME: str = "prompt"
22 | RESPONSE_FIELD_NAME: str = "response"
23 | CACHE_VECTOR_FIELD_NAME: str = "prompt_vector"
24 | INSERTED_AT_FIELD_NAME: str = "inserted_at"
25 | UPDATED_AT_FIELD_NAME: str = "updated_at"
26 | METADATA_FIELD_NAME: str = "metadata"
27 | 
28 | # EmbeddingsCache
29 | TEXT_FIELD_NAME: str = "text"
30 | MODEL_NAME_FIELD_NAME: str = "model_name"
31 | EMBEDDING_FIELD_NAME: str = "embedding"
32 | DIMENSIONS_FIELD_NAME: str = "dimensions"
33 | 
34 | # SemanticRouter
35 | ROUTE_VECTOR_FIELD_NAME: str = "vector"
36 | 


--------------------------------------------------------------------------------
/redisvl/extensions/llmcache/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL LLM Cache Extensions (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.cache` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.cache.llm.base import BaseLLMCache
10 | from redisvl.extensions.cache.llm.schema import (
11 |     CacheEntry,
12 |     CacheHit,
13 |     SemanticCacheIndexSchema,
14 | )
15 | from redisvl.extensions.cache.llm.semantic import SemanticCache
16 | 
17 | warnings.warn(
18 |     "Importing from redisvl.extensions.llmcache is deprecated. "
19 |     "Please import from redisvl.extensions.cache.llm instead.",
20 |     DeprecationWarning,
21 |     stacklevel=2,
22 | )
23 | 
24 | __all__ = [
25 |     "BaseLLMCache",
26 |     "SemanticCache",
27 |     "CacheEntry",
28 |     "CacheHit",
29 |     "SemanticCacheIndexSchema",
30 | ]
31 | 


--------------------------------------------------------------------------------
/redisvl/extensions/llmcache/base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Base LLM Cache (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.cache.llm.base` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.cache.llm.base import BaseLLMCache
10 | 
11 | warnings.warn(
12 |     "Importing from redisvl.extensions.llmcache.base is deprecated. "
13 |     "Please import from redisvl.extensions.cache.llm.base instead.",
14 |     DeprecationWarning,
15 |     stacklevel=2,
16 | )
17 | 
18 | __all__ = ["BaseLLMCache"]
19 | 


--------------------------------------------------------------------------------
/redisvl/extensions/llmcache/schema.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Semantic Cache Schema (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.cache.llm.schema` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.cache.llm.schema import (
10 |     CacheEntry,
11 |     CacheHit,
12 |     SemanticCacheIndexSchema,
13 | )
14 | 
15 | warnings.warn(
16 |     "Importing from redisvl.extensions.llmcache.schema is deprecated. "
17 |     "Please import from redisvl.extensions.cache.llm.schema instead.",
18 |     DeprecationWarning,
19 |     stacklevel=2,
20 | )
21 | 
22 | __all__ = ["CacheEntry", "CacheHit", "SemanticCacheIndexSchema"]
23 | 


--------------------------------------------------------------------------------
/redisvl/extensions/llmcache/semantic.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Semantic Cache (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.cache.llm.semantic` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.cache.llm.semantic import SemanticCache
10 | 
11 | warnings.warn(
12 |     "Importing from redisvl.extensions.llmcache.semantic is deprecated. "
13 |     "Please import from redisvl.extensions.cache.llm.semantic instead.",
14 |     DeprecationWarning,
15 |     stacklevel=2,
16 | )
17 | 
18 | __all__ = ["SemanticCache"]
19 | 


--------------------------------------------------------------------------------
/redisvl/extensions/message_history/__init__.py:
--------------------------------------------------------------------------------
1 | from redisvl.extensions.message_history.base_history import BaseMessageHistory
2 | from redisvl.extensions.message_history.message_history import MessageHistory
3 | from redisvl.extensions.message_history.semantic_history import SemanticMessageHistory
4 | 
5 | __all__ = ["BaseMessageHistory", "MessageHistory", "SemanticMessageHistory"]
6 | 


--------------------------------------------------------------------------------
/redisvl/extensions/message_history/base_history.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Union
  2 | 
  3 | from redisvl.extensions.constants import (
  4 |     CONTENT_FIELD_NAME,
  5 |     ROLE_FIELD_NAME,
  6 |     TOOL_FIELD_NAME,
  7 | )
  8 | from redisvl.extensions.message_history.schema import ChatMessage
  9 | from redisvl.utils.utils import create_ulid
 10 | 
 11 | 
 12 | class BaseMessageHistory:
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         name: str,
 17 |         session_tag: Optional[str] = None,
 18 |     ):
 19 |         """Initialize message history with index
 20 | 
 21 |         Message History stores the current and previous user text prompts and
 22 |         LLM responses to allow for enriching future prompts with session
 23 |         context. Message history is stored in individual user or LLM prompts and
 24 |         responses.
 25 | 
 26 |         Args:
 27 |             name (str): The name of the message history index.
 28 |             session_tag (str): Tag to be added to entries to link to a specific
 29 |                 conversation session. Defaults to instance ULID.
 30 |         """
 31 |         self._name = name
 32 |         self._session_tag = session_tag or create_ulid()
 33 | 
 34 |     def clear(self) -> None:
 35 |         """Clears the chat message history."""
 36 |         raise NotImplementedError
 37 | 
 38 |     def delete(self) -> None:
 39 |         """Clear all conversation history and remove any search indices."""
 40 |         raise NotImplementedError
 41 | 
 42 |     def drop(self, id_field: Optional[str] = None) -> None:
 43 |         """Remove a specific exchange from the conversation history.
 44 | 
 45 |         Args:
 46 |             id_field (Optional[str]): The id_field of the entry to delete.
 47 |                 If None then the last entry is deleted.
 48 |         """
 49 |         raise NotImplementedError
 50 | 
 51 |     @property
 52 |     def messages(self) -> Union[List[str], List[Dict[str, str]]]:
 53 |         """Returns the full chat history."""
 54 |         raise NotImplementedError
 55 | 
 56 |     def get_recent(
 57 |         self,
 58 |         top_k: int = 5,
 59 |         as_text: bool = False,
 60 |         raw: bool = False,
 61 |         session_tag: Optional[str] = None,
 62 |     ) -> Union[List[str], List[Dict[str, str]]]:
 63 |         """Retreive the recent conversation history in sequential order.
 64 | 
 65 |         Args:
 66 |             top_k (int): The number of previous exchanges to return. Default is 5.
 67 |                 Note that one exchange contains both a prompt and response.
 68 |             as_text (bool): Whether to return the conversation as a single string,
 69 |                 or list of alternating prompts and responses.
 70 |             raw (bool): Whether to return the full Redis hash entry or just the
 71 |                 prompt and response
 72 |             session_tag (str): Tag to be added to entries to link to a specific
 73 |                 conversation session. Defaults to instance ULID.
 74 | 
 75 |         Returns:
 76 |             Union[str, List[str]]: A single string transcription of the messages
 77 |                                    or list of strings if as_text is false.
 78 | 
 79 |         Raises:
 80 |             ValueError: If top_k is not an integer greater than or equal to 0.
 81 |         """
 82 |         raise NotImplementedError
 83 | 
 84 |     def _format_context(
 85 |         self, messages: List[Dict[str, Any]], as_text: bool
 86 |     ) -> Union[List[str], List[Dict[str, str]]]:
 87 |         """Extracts the prompt and response fields from the Redis hashes and
 88 |            formats them as either flat dictionaries or strings.
 89 | 
 90 |         Args:
 91 |             messages (List[Dict[str, Any]]): The messages from the message history index.
 92 |             as_text (bool): Whether to return the conversation as a single string,
 93 |                 or list of alternating prompts and responses.
 94 | 
 95 |         Returns:
 96 |             Union[str, List[str]]: A single string transcription of the messages
 97 |                 or list of strings if as_text is false.
 98 |         """
 99 |         context = []
100 | 
101 |         for message in messages:
102 | 
103 |             chat_message = ChatMessage(**message)
104 | 
105 |             if as_text:
106 |                 context.append(chat_message.content)
107 |             else:
108 |                 chat_message_dict = {
109 |                     ROLE_FIELD_NAME: chat_message.role,
110 |                     CONTENT_FIELD_NAME: chat_message.content,
111 |                 }
112 |                 if chat_message.tool_call_id is not None:
113 |                     chat_message_dict[TOOL_FIELD_NAME] = chat_message.tool_call_id
114 | 
115 |                 context.append(chat_message_dict)  # type: ignore
116 | 
117 |         return context
118 | 
119 |     def store(
120 |         self, prompt: str, response: str, session_tag: Optional[str] = None
121 |     ) -> None:
122 |         """Insert a prompt:response pair into the message history. A timestamp
123 |         is associated with each exchange so that they can be later sorted
124 |         in sequential ordering after retrieval.
125 | 
126 |         Args:
127 |             prompt (str): The user prompt to the LLM.
128 |             response (str): The corresponding LLM response.
129 |             session_tag (Optional[str]): The tag to mark the message with. Defaults to None.
130 |         """
131 |         raise NotImplementedError
132 | 
133 |     def add_messages(
134 |         self, messages: List[Dict[str, str]], session_tag: Optional[str] = None
135 |     ) -> None:
136 |         """Insert a list of prompts and responses into the message history.
137 |         A timestamp is associated with each so that they can be later sorted
138 |         in sequential ordering after retrieval.
139 | 
140 |         Args:
141 |             messages (List[Dict[str, str]]): The list of user prompts and LLM responses.
142 |             session_tag (Optional[str]): The tag to mark the messages with. Defaults to None.
143 |         """
144 |         raise NotImplementedError
145 | 
146 |     def add_message(
147 |         self, message: Dict[str, str], session_tag: Optional[str] = None
148 |     ) -> None:
149 |         """Insert a single prompt or response into the message history.
150 |         A timestamp is associated with it so that it can be later sorted
151 |         in sequential ordering after retrieval.
152 | 
153 |         Args:
154 |             message (Dict[str,str]): The user prompt or LLM response.
155 |             session_tag (Optional[str]): The tag to mark the message with. Defaults to None.
156 |         """
157 |         raise NotImplementedError
158 | 


--------------------------------------------------------------------------------
/redisvl/extensions/message_history/schema.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional
  2 | 
  3 | from pydantic import BaseModel, ConfigDict, Field, model_validator
  4 | 
  5 | from redisvl.extensions.constants import (
  6 |     CONTENT_FIELD_NAME,
  7 |     ID_FIELD_NAME,
  8 |     MESSAGE_VECTOR_FIELD_NAME,
  9 |     ROLE_FIELD_NAME,
 10 |     SESSION_FIELD_NAME,
 11 |     TIMESTAMP_FIELD_NAME,
 12 |     TOOL_FIELD_NAME,
 13 | )
 14 | from redisvl.redis.utils import array_to_buffer
 15 | from redisvl.schema import IndexSchema
 16 | from redisvl.utils.utils import current_timestamp
 17 | 
 18 | 
 19 | class ChatMessage(BaseModel):
 20 |     """A single chat message exchanged between a user and an LLM."""
 21 | 
 22 |     entry_id: Optional[str] = Field(default=None)
 23 |     """A unique identifier for the message."""
 24 |     role: str  # TODO -- do we enumify this?
 25 |     """The role of the message sender (e.g., 'user' or 'llm')."""
 26 |     content: str
 27 |     """The content of the message."""
 28 |     session_tag: str
 29 |     """Tag associated with the current conversation session."""
 30 |     timestamp: Optional[float] = Field(default=None)
 31 |     """The time the message was sent, in UTC, rounded to milliseconds."""
 32 |     tool_call_id: Optional[str] = Field(default=None)
 33 |     """An optional identifier for a tool call associated with the message."""
 34 |     vector_field: Optional[List[float]] = Field(default=None)
 35 |     """The vector representation of the message content."""
 36 |     model_config = ConfigDict(arbitrary_types_allowed=True)
 37 | 
 38 |     @model_validator(mode="before")
 39 |     @classmethod
 40 |     def generate_id(cls, values):
 41 |         if TIMESTAMP_FIELD_NAME not in values:
 42 |             values[TIMESTAMP_FIELD_NAME] = current_timestamp()
 43 |         if ID_FIELD_NAME not in values:
 44 |             values[ID_FIELD_NAME] = (
 45 |                 f"{values[SESSION_FIELD_NAME]}:{values[TIMESTAMP_FIELD_NAME]}"
 46 |             )
 47 |         return values
 48 | 
 49 |     def to_dict(self, dtype: Optional[str] = None) -> Dict:
 50 |         data = self.model_dump(exclude_none=True)
 51 | 
 52 |         # handle optional fields
 53 |         if MESSAGE_VECTOR_FIELD_NAME in data:
 54 |             data[MESSAGE_VECTOR_FIELD_NAME] = array_to_buffer(
 55 |                 data[MESSAGE_VECTOR_FIELD_NAME], dtype  # type: ignore[arg-type]
 56 |             )
 57 |         return data
 58 | 
 59 | 
 60 | class MessageHistorySchema(IndexSchema):
 61 | 
 62 |     @classmethod
 63 |     def from_params(cls, name: str, prefix: str):
 64 | 
 65 |         return cls(
 66 |             index={"name": name, "prefix": prefix},  # type: ignore
 67 |             fields=[  # type: ignore
 68 |                 {"name": ROLE_FIELD_NAME, "type": "tag"},
 69 |                 {"name": CONTENT_FIELD_NAME, "type": "text"},
 70 |                 {"name": TOOL_FIELD_NAME, "type": "tag"},
 71 |                 {"name": TIMESTAMP_FIELD_NAME, "type": "numeric"},
 72 |                 {"name": SESSION_FIELD_NAME, "type": "tag"},
 73 |             ],
 74 |         )
 75 | 
 76 | 
 77 | class SemanticMessageHistorySchema(IndexSchema):
 78 | 
 79 |     @classmethod
 80 |     def from_params(cls, name: str, prefix: str, vectorizer_dims: int, dtype: str):
 81 | 
 82 |         return cls(
 83 |             index={"name": name, "prefix": prefix},  # type: ignore
 84 |             fields=[  # type: ignore
 85 |                 {"name": ROLE_FIELD_NAME, "type": "tag"},
 86 |                 {"name": CONTENT_FIELD_NAME, "type": "text"},
 87 |                 {"name": TOOL_FIELD_NAME, "type": "tag"},
 88 |                 {"name": TIMESTAMP_FIELD_NAME, "type": "numeric"},
 89 |                 {"name": SESSION_FIELD_NAME, "type": "tag"},
 90 |                 {
 91 |                     "name": MESSAGE_VECTOR_FIELD_NAME,
 92 |                     "type": "vector",
 93 |                     "attrs": {
 94 |                         "dims": vectorizer_dims,
 95 |                         "datatype": dtype,
 96 |                         "distance_metric": "cosine",
 97 |                         "algorithm": "flat",
 98 |                     },
 99 |                 },
100 |             ],
101 |         )
102 | 


--------------------------------------------------------------------------------
/redisvl/extensions/router/__init__.py:
--------------------------------------------------------------------------------
1 | from redisvl.extensions.router.schema import Route, RoutingConfig
2 | from redisvl.extensions.router.semantic import SemanticRouter
3 | 
4 | __all__ = ["SemanticRouter", "Route", "RoutingConfig"]
5 | 


--------------------------------------------------------------------------------
/redisvl/extensions/router/schema.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from enum import Enum
  3 | from typing import Any, Dict, List, Optional
  4 | 
  5 | from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
  6 | from typing_extensions import Annotated
  7 | 
  8 | from redisvl.extensions.constants import ROUTE_VECTOR_FIELD_NAME
  9 | from redisvl.schema import IndexSchema
 10 | 
 11 | 
 12 | class Route(BaseModel):
 13 |     """Model representing a routing path with associated metadata and thresholds."""
 14 | 
 15 |     name: str
 16 |     """The name of the route."""
 17 |     references: List[str]
 18 |     """List of reference phrases for the route."""
 19 |     metadata: Dict[str, Any] = Field(default={})
 20 |     """Metadata associated with the route."""
 21 |     distance_threshold: Annotated[float, Field(strict=True, gt=0, le=2)] = 0.5
 22 |     """Distance threshold for matching the route."""
 23 | 
 24 |     @field_validator("name")
 25 |     @classmethod
 26 |     def name_must_not_be_empty(cls, v):
 27 |         if not v or not v.strip():
 28 |             raise ValueError("Route name must not be empty")
 29 |         return v
 30 | 
 31 |     @field_validator("references")
 32 |     @classmethod
 33 |     def references_must_not_be_empty(cls, v):
 34 |         if not v:
 35 |             raise ValueError("References must not be empty")
 36 |         if any(not ref.strip() for ref in v):
 37 |             raise ValueError("All references must be non-empty strings")
 38 |         return v
 39 | 
 40 | 
 41 | class RouteMatch(BaseModel):
 42 |     """Model representing a matched route with distance information."""
 43 | 
 44 |     name: Optional[str] = None
 45 |     """The matched route name."""
 46 |     distance: Optional[float] = Field(default=None)
 47 |     """The vector distance between the statement and the matched route."""
 48 | 
 49 | 
 50 | class DistanceAggregationMethod(Enum):
 51 |     """Enumeration for distance aggregation methods."""
 52 | 
 53 |     avg = "avg"
 54 |     """Compute the average of the vector distances."""
 55 |     min = "min"
 56 |     """Compute the minimum of the vector distances."""
 57 |     sum = "sum"
 58 |     """Compute the sum of the vector distances."""
 59 | 
 60 | 
 61 | class RoutingConfig(BaseModel):
 62 |     """Configuration for routing behavior."""
 63 | 
 64 |     """The maximum number of top matches to return."""
 65 |     max_k: Annotated[int, Field(strict=True, gt=0)] = 1
 66 |     """Aggregation method to use to classify queries."""
 67 |     aggregation_method: DistanceAggregationMethod = Field(
 68 |         default=DistanceAggregationMethod.avg
 69 |     )
 70 | 
 71 |     model_config = ConfigDict(extra="ignore")
 72 | 
 73 |     @model_validator(mode="before")
 74 |     @classmethod
 75 |     def remove_distance_threshold(cls, values: Dict[str, Any]) -> Dict[str, Any]:
 76 |         if "distance_threshold" in values:
 77 |             warnings.warn(
 78 |                 "The 'distance_threshold' field is deprecated and will be ignored. Set distance_threshold per Route.",
 79 |                 DeprecationWarning,
 80 |                 stacklevel=2,
 81 |             )
 82 |             values.pop("distance_threshold")
 83 |         return values
 84 | 
 85 | 
 86 | class SemanticRouterIndexSchema(IndexSchema):
 87 |     """Customized index schema for SemanticRouter."""
 88 | 
 89 |     @classmethod
 90 |     def from_params(cls, name: str, vector_dims: int, dtype: str):
 91 |         """Create an index schema based on router name and vector dimensions.
 92 | 
 93 |         Args:
 94 |             name (str): The name of the index.
 95 |             vector_dims (int): The dimensions of the vectors.
 96 | 
 97 |         Returns:
 98 |             SemanticRouterIndexSchema: The constructed index schema.
 99 |         """
100 |         return cls(
101 |             index={"name": name, "prefix": name},  # type: ignore
102 |             fields=[  # type: ignore
103 |                 {"name": "reference_id", "type": "tag"},
104 |                 {"name": "route_name", "type": "tag"},
105 |                 {"name": "reference", "type": "text"},
106 |                 {
107 |                     "name": ROUTE_VECTOR_FIELD_NAME,
108 |                     "type": "vector",
109 |                     "attrs": {
110 |                         "algorithm": "flat",
111 |                         "dims": vector_dims,
112 |                         "distance_metric": "cosine",
113 |                         "datatype": dtype,
114 |                     },
115 |                 },
116 |             ],
117 |         )
118 | 


--------------------------------------------------------------------------------
/redisvl/extensions/session_manager/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Session Manager Extensions (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.message_history` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.session_manager.base_session import BaseSessionManager
10 | from redisvl.extensions.session_manager.semantic_session import SemanticSessionManager
11 | from redisvl.extensions.session_manager.standard_session import StandardSessionManager
12 | 
13 | warnings.warn(
14 |     "Importing from redisvl.extensions.session_manager is deprecated. "
15 |     "StandardSessionManager has been renamed to MessageHistory. "
16 |     "SemanticSessionManager has been renamed to SemanticMessageHistory. "
17 |     "Please import from redisvl.extensions.message_history instead.",
18 |     DeprecationWarning,
19 |     stacklevel=2,
20 | )
21 | 
22 | 
23 | __all__ = ["BaseSessionManager", "StandardSessionManager", "SemanticSessionManager"]
24 | 


--------------------------------------------------------------------------------
/redisvl/extensions/session_manager/base_session.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Standard Session Manager (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.base_history` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.message_history.base_history import BaseMessageHistory
10 | 
11 | warnings.warn(
12 |     "Importing from redisvl.extensions.session_manager.base_session is deprecated. "
13 |     "BaseSessionManager has been renamed to BaseMessageHistory. "
14 |     "Please import BaseMessageHistory from redisvl.extensions.base_history instead.",
15 |     DeprecationWarning,
16 |     stacklevel=2,
17 | )
18 | 
19 | 
20 | class BaseSessionManager(BaseMessageHistory):
21 |     # keep for backward compatibility
22 |     pass
23 | 
24 | 
25 | __all__ = ["BaseSessionManager"]
26 | 


--------------------------------------------------------------------------------
/redisvl/extensions/session_manager/schema.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Session Manager Schema (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.message_history.schema` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.message_history.schema import (
10 |     ChatMessage,
11 |     MessageHistorySchema,
12 |     SemanticMessageHistorySchema,
13 | )
14 | 
15 | warnings.warn(
16 |     "Importing from redisvl.extensions.session_manager.schema is deprecated. "
17 |     "Please import from redisvl.extensions.message_history.schema instead.",
18 |     DeprecationWarning,
19 |     stacklevel=2,
20 | )
21 | 
22 | 
23 | class StandardSessionIndexSchema(MessageHistorySchema):
24 |     # keep for backward compatibility
25 |     pass
26 | 
27 | 
28 | class SemanticSessionIndexSchema(SemanticMessageHistorySchema):
29 |     # keep for backward compatibility
30 |     pass
31 | 
32 | 
33 | __all__ = [
34 |     "ChatMessage",
35 |     "StandardSessionIndexSchema",
36 |     "SemanticSessionIndexSchema",
37 | ]
38 | 


--------------------------------------------------------------------------------
/redisvl/extensions/session_manager/semantic_session.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Semantic Session Manager (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.semantic_history` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.message_history.semantic_history import SemanticMessageHistory
10 | 
11 | warnings.warn(
12 |     "Importing from redisvl.extensions.session_manger.semantic_session is deprecated. "
13 |     "SemanticSessionManager has been renamed to SemanticMessageHistory. "
14 |     "Please import SemanticMessageHistory from redisvl.extensions.semantic_history instead.",
15 |     DeprecationWarning,
16 |     stacklevel=2,
17 | )
18 | 
19 | 
20 | class SemanticSessionManager(SemanticMessageHistory):
21 |     # keep for backwards compatibility
22 |     pass
23 | 
24 | 
25 | __all__ = ["SemanticSessionManager"]
26 | 


--------------------------------------------------------------------------------
/redisvl/extensions/session_manager/standard_session.py:
--------------------------------------------------------------------------------
 1 | """
 2 | RedisVL Standard Session Manager (Deprecated Path)
 3 | 
 4 | This module is kept for backward compatibility. Please use `redisvl.extensions.message_history` instead.
 5 | """
 6 | 
 7 | import warnings
 8 | 
 9 | from redisvl.extensions.message_history.message_history import MessageHistory
10 | 
11 | warnings.warn(
12 |     "Importing from redisvl.extensions.session_manger.standard_session is deprecated. "
13 |     "StandardSessionManager has been renamed to MessageHistory. "
14 |     "Please import MessageHistory from redisvl.extensions.message_history instead.",
15 |     DeprecationWarning,
16 |     stacklevel=2,
17 | )
18 | 
19 | 
20 | class StandardSessionManager(MessageHistory):
21 |     # keep for backward compatibility
22 |     pass
23 | 
24 | 
25 | __all__ = ["StandardSessionManager"]
26 | 


--------------------------------------------------------------------------------
/redisvl/index/__init__.py:
--------------------------------------------------------------------------------
1 | from redisvl.index.index import AsyncSearchIndex, SearchIndex
2 | 
3 | __all__ = ["SearchIndex", "AsyncSearchIndex"]
4 | 


--------------------------------------------------------------------------------
/redisvl/query/__init__.py:
--------------------------------------------------------------------------------
 1 | from redisvl.query.aggregate import AggregationQuery, HybridQuery
 2 | from redisvl.query.query import (
 3 |     BaseQuery,
 4 |     BaseVectorQuery,
 5 |     CountQuery,
 6 |     FilterQuery,
 7 |     RangeQuery,
 8 |     TextQuery,
 9 |     VectorQuery,
10 |     VectorRangeQuery,
11 | )
12 | 
13 | __all__ = [
14 |     "BaseQuery",
15 |     "BaseVectorQuery",
16 |     "VectorQuery",
17 |     "FilterQuery",
18 |     "RangeQuery",
19 |     "VectorRangeQuery",
20 |     "CountQuery",
21 |     "TextQuery",
22 |     "AggregationQuery",
23 |     "HybridQuery",
24 | ]
25 | 


--------------------------------------------------------------------------------
/redisvl/redis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/redisvl/redis/__init__.py


--------------------------------------------------------------------------------
/redisvl/redis/constants.py:
--------------------------------------------------------------------------------
 1 | # required modules
 2 | DEFAULT_REQUIRED_MODULES = [
 3 |     {"name": "search", "ver": 20600},
 4 |     {"name": "searchlight", "ver": 20600},
 5 | ]
 6 | 
 7 | # default tag separator
 8 | REDIS_TAG_SEPARATOR = ","
 9 | 
10 | 
11 | REDIS_URL_ENV_VAR = "REDIS_URL"
12 | 


--------------------------------------------------------------------------------
/redisvl/schema/__init__.py:
--------------------------------------------------------------------------------
 1 | from redisvl.schema.fields import (
 2 |     BaseField,
 3 |     FieldTypes,
 4 |     FlatVectorField,
 5 |     GeoField,
 6 |     HNSWVectorField,
 7 |     NumericField,
 8 |     TagField,
 9 |     TextField,
10 |     VectorDataType,
11 |     VectorDistanceMetric,
12 |     VectorIndexAlgorithm,
13 | )
14 | from redisvl.schema.schema import IndexInfo, IndexSchema, StorageType
15 | 
16 | # Expose validation functionality
17 | from redisvl.schema.validation import validate_object
18 | 
19 | __all__ = [
20 |     "IndexSchema",
21 |     "IndexInfo",
22 |     "StorageType",
23 |     "FieldTypes",
24 |     "VectorDistanceMetric",
25 |     "VectorDataType",
26 |     "VectorIndexAlgorithm",
27 |     "BaseField",
28 |     "TextField",
29 |     "TagField",
30 |     "NumericField",
31 |     "GeoField",
32 |     "FlatVectorField",
33 |     "HNSWVectorField",
34 |     "validate_object",
35 | ]
36 | 


--------------------------------------------------------------------------------
/redisvl/schema/type_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Any
 3 | 
 4 | 
 5 | class TypeInferrer:
 6 |     """Infers the type of a field based on its value."""
 7 | 
 8 |     GEO_PATTERN = re.compile(
 9 |         r"^\s*[-+]?([1-8]?\d(\.\d+)?|90(\.0+)?),\s*[-+]?(180(\.0+)?|((1[0-7]\d)|([1-9]?\d))(\.\d+)?)\s*$"
10 |     )
11 | 
12 |     TYPE_METHOD_MAP = {
13 |         "numeric": "_is_numeric",
14 |         "geo": "_is_geographic",
15 |         "tag": "_is_tag",
16 |         "text": "_is_text",
17 |     }
18 | 
19 |     @classmethod
20 |     def infer(cls, value: Any) -> str:
21 |         """Infers the field type for a given value.
22 | 
23 |         Args:
24 |             value: The value to infer the type of.
25 | 
26 |         Returns:
27 |             The inferred field type as a string.
28 | 
29 |         Raises:
30 |             ValueError: If the type cannot be inferred.
31 |         """
32 |         for type_name, method_name in cls.TYPE_METHOD_MAP.items():
33 |             if getattr(cls, method_name)(value):
34 |                 return type_name
35 |         raise ValueError(f"Unable to infer type for value: {value}")
36 | 
37 |     @classmethod
38 |     def _is_numeric(cls, value: Any) -> bool:
39 |         """Check if the value is numeric."""
40 |         if not isinstance(value, (int, float, str)):
41 |             return False
42 |         try:
43 |             float(value)
44 |             return True
45 |         except (ValueError, TypeError):
46 |             return False
47 | 
48 |     @classmethod
49 |     def _is_tag(cls, value: Any) -> bool:
50 |         """Check if the value is a tag."""
51 |         return isinstance(value, (list, set, tuple)) and all(
52 |             isinstance(v, str) for v in value
53 |         )
54 | 
55 |     @classmethod
56 |     def _is_text(cls, value: Any) -> bool:
57 |         """Check if the value is text."""
58 |         return isinstance(value, str)
59 | 
60 |     @classmethod
61 |     def _is_geographic(cls, value: Any) -> bool:
62 |         """Check if the value is a geographic coordinate."""
63 |         return isinstance(value, str) and cls.GEO_PATTERN.match(value) is not None
64 | 


--------------------------------------------------------------------------------
/redisvl/types.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | from redis import Redis as SyncRedis
 4 | from redis.asyncio import Redis as AsyncRedis
 5 | from redis.asyncio.client import Pipeline as AsyncPipeline
 6 | from redis.asyncio.cluster import ClusterPipeline as AsyncClusterPipeline
 7 | from redis.asyncio.cluster import RedisCluster as AsyncRedisCluster
 8 | from redis.client import Pipeline as SyncPipeline
 9 | from redis.cluster import ClusterPipeline as SyncClusterPipeline
10 | from redis.cluster import RedisCluster as SyncRedisCluster
11 | 
12 | SyncRedisClient = Union[SyncRedis, SyncRedisCluster]
13 | AsyncRedisClient = Union[AsyncRedis, AsyncRedisCluster]
14 | RedisClient = Union[SyncRedisClient, AsyncRedisClient]
15 | 
16 | SyncRedisPipeline = Union[SyncPipeline, SyncClusterPipeline]
17 | AsyncRedisPipeline = Union[AsyncPipeline, AsyncClusterPipeline]
18 | 
19 | RedisClientOrPipeline = Union[SyncRedisClient, SyncRedisPipeline]
20 | AsyncRedisClientOrPipeline = Union[AsyncRedisClient, AsyncRedisPipeline]
21 | 


--------------------------------------------------------------------------------
/redisvl/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/redisvl/utils/__init__.py


--------------------------------------------------------------------------------
/redisvl/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | 
 5 | def get_logger(name, log_level="info", fmt=None):
 6 |     """Return a logger instance."""
 7 | 
 8 |     # Use file name if logger is in debug mode
 9 |     name = "RedisVL" if log_level == "debug" else name
10 | 
11 |     logger = logging.getLogger(name)
12 | 
13 |     # Only configure this specific logger, not the root logger
14 |     # Check if the logger already has handlers to respect existing configuration
15 |     if not logger.handlers:
16 |         logging.basicConfig(
17 |             level=logging.INFO,
18 |             format="%(asctime)s %(name)s %(levelname)s   %(message)s",
19 |             datefmt="%H:%M:%S",
20 |             stream=sys.stdout,
21 |         )
22 |     return logger
23 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/__init__.py:
--------------------------------------------------------------------------------
 1 | from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
 2 | from redisvl.utils.optimize.cache import CacheThresholdOptimizer
 3 | from redisvl.utils.optimize.router import RouterThresholdOptimizer
 4 | from redisvl.utils.optimize.schema import LabeledData
 5 | 
 6 | __all__ = [
 7 |     "CacheThresholdOptimizer",
 8 |     "RouterThresholdOptimizer",
 9 |     "EvalMetric",
10 |     "BaseThresholdOptimizer",
11 |     "LabeledData",
12 | ]
13 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from enum import Enum
 3 | from typing import Any, Callable, Dict, List, TypeVar
 4 | 
 5 | from redisvl.utils.optimize.utils import _validate_test_dict
 6 | 
 7 | 
 8 | class EvalMetric(str, Enum):
 9 |     """Evaluation metrics for threshold optimization."""
10 | 
11 |     F1 = "f1"
12 |     PRECISION = "precision"
13 |     RECALL = "recall"
14 | 
15 | 
16 | T = TypeVar("T")  # Type variable for the optimizable object (Cache or Router)
17 | 
18 | 
19 | class BaseThresholdOptimizer(ABC):
20 |     """Abstract base class for threshold optimizers."""
21 | 
22 |     def __init__(
23 |         self,
24 |         optimizable: T,
25 |         test_dict: List[Dict],
26 |         opt_fn: Callable,
27 |         eval_metric: str = "f1",
28 |     ):
29 |         """Initialize the optimizer.
30 | 
31 |         Args:
32 |             optimizable: The object to optimize (Cache or Router)
33 |             test_dict: List of test cases
34 |             eval_fn: Function to evaluate performance
35 |             opt_fn: Function to perform optimization
36 |         """
37 |         self.test_data = _validate_test_dict(test_dict)
38 |         self.optimizable = optimizable
39 |         self.eval_metric = EvalMetric(eval_metric)
40 |         self.opt_fn = opt_fn
41 | 
42 |     @abstractmethod
43 |     def optimize(self, **kwargs: Any):
44 |         """Optimize thresholds using the provided optimization function."""
45 |         pass
46 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/cache.py:
--------------------------------------------------------------------------------
  1 | from typing import TYPE_CHECKING, Any, Callable, Dict, List
  2 | 
  3 | from redisvl.utils.utils import lazy_import
  4 | 
  5 | if TYPE_CHECKING:
  6 |     from ranx import Qrels, Run, evaluate
  7 | else:
  8 |     Qrels = lazy_import("ranx.Qrels")
  9 |     Run = lazy_import("ranx.Run")
 10 |     evaluate = lazy_import("ranx.evaluate")
 11 | 
 12 | np = lazy_import("numpy")
 13 | 
 14 | from redisvl.extensions.cache.llm.semantic import SemanticCache
 15 | from redisvl.query import RangeQuery
 16 | from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
 17 | from redisvl.utils.optimize.schema import LabeledData
 18 | from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 19 | 
 20 | 
 21 | def _generate_run_cache(test_data: List[LabeledData], threshold: float) -> "Run":
 22 |     """Format observed data for evaluation with ranx"""
 23 |     run_dict: Dict[str, Dict[str, int]] = {}
 24 | 
 25 |     for td in test_data:
 26 |         run_dict[td.id] = {}
 27 |         for res in td.response:
 28 |             if float(res["vector_distance"]) < threshold:
 29 |                 # value of 1 is irrelevant checks only on match for f1
 30 |                 run_dict[td.id][res["id"]] = 1
 31 | 
 32 |         if not run_dict[td.id]:
 33 |             # ranx is a little odd in that if there are no matches it errors
 34 |             # if however there are no keys that match you get the correct score
 35 |             run_dict[td.id][NULL_RESPONSE_KEY] = 1
 36 | 
 37 |     return Run(run_dict)
 38 | 
 39 | 
 40 | def _eval_cache(
 41 |     test_data: List[LabeledData], threshold: float, qrels: "Qrels", metric: str
 42 | ) -> float:
 43 |     """Formats run data and evaluates supported metric"""
 44 |     run = _generate_run_cache(test_data, threshold)
 45 |     return evaluate(qrels, run, metric, make_comparable=True)
 46 | 
 47 | 
 48 | def _get_best_threshold(metrics: dict) -> float:
 49 |     """
 50 |     Returns the threshold with the highest F1 score.
 51 |     If multiple thresholds have the same F1 score, returns the lowest threshold.
 52 |     """
 53 |     return max(metrics.items(), key=lambda x: (x[1]["score"], -x[0]))[0]
 54 | 
 55 | 
 56 | def _grid_search_opt_cache(
 57 |     cache: SemanticCache, test_data: List[LabeledData], eval_metric: EvalMetric
 58 | ):
 59 |     """Evaluates all thresholds in linspace for cache to determine optimal"""
 60 |     thresholds = np.linspace(0.01, 0.8, 60)
 61 |     metrics = {}
 62 | 
 63 |     for td in test_data:
 64 |         vec = cache._vectorizer.embed(td.query)
 65 |         query = RangeQuery(
 66 |             vec, vector_field_name="prompt_vector", distance_threshold=1.0
 67 |         )
 68 |         res = cache.index.query(query)
 69 |         td.response = res
 70 | 
 71 |     qrels = _format_qrels(test_data)
 72 | 
 73 |     for threshold in thresholds:
 74 |         score = _eval_cache(test_data, threshold, qrels, eval_metric.value)
 75 |         metrics[threshold] = {"score": score}
 76 | 
 77 |     best_threshold = _get_best_threshold(metrics)
 78 |     cache.set_threshold(best_threshold)
 79 | 
 80 | 
 81 | class CacheThresholdOptimizer(BaseThresholdOptimizer):
 82 |     """
 83 |     Class for optimizing thresholds for a SemanticCache.
 84 | 
 85 |     .. code-block:: python
 86 | 
 87 |         from redisvl.extensions.cache.llm import SemanticCache
 88 |         from redisvl.utils.optimize import CacheThresholdOptimizer
 89 | 
 90 |         sem_cache = SemanticCache(
 91 |             name="sem_cache",                    # underlying search index name
 92 |             redis_url="redis://localhost:6379",  # redis connection url string
 93 |             distance_threshold=0.5               # semantic cache distance threshold
 94 |         )
 95 | 
 96 |         paris_key = sem_cache.store(prompt="what is the capital of france?", response="paris")
 97 |         rabat_key = sem_cache.store(prompt="what is the capital of morocco?", response="rabat")
 98 | 
 99 |         test_data = [
100 |             {
101 |                 "query": "What's the capital of Britain?",
102 |                 "query_match": ""
103 |             },
104 |             {
105 |                 "query": "What's the capital of France??",
106 |                 "query_match": paris_key
107 |             },
108 |             {
109 |                 "query": "What's the capital city of Morocco?",
110 |                 "query_match": rabat_key
111 |             },
112 |         ]
113 | 
114 |         optimizer = CacheThresholdOptimizer(sem_cache, test_data)
115 |         optimizer.optimize()
116 |     """
117 | 
118 |     def __init__(
119 |         self,
120 |         cache: SemanticCache,
121 |         test_dict: List[Dict[str, Any]],
122 |         opt_fn: Callable = _grid_search_opt_cache,
123 |         eval_metric: str = "f1",
124 |     ):
125 |         """Initialize the cache optimizer.
126 | 
127 |         Args:
128 |             cache (SemanticCache): The RedisVL SemanticCache instance to optimize.
129 |             test_dict (List[Dict[str, Any]]): List of test cases.
130 |             opt_fn (Callable): Function to perform optimization. Defaults to
131 |                 grid search.
132 |             eval_metric (str): Evaluation metric for threshold optimization.
133 |                 Defaults to "f1" score.
134 | 
135 |         Raises:
136 |             ValueError: If the test_dict not in LabeledData format.
137 |         """
138 |         super().__init__(cache, test_dict, opt_fn, eval_metric)
139 | 
140 |     def optimize(self, **kwargs: Any):
141 |         """Optimize thresholds using the provided optimization function for cache case."""
142 |         self.opt_fn(self.optimizable, self.test_data, self.eval_metric, **kwargs)
143 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/router.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from typing import TYPE_CHECKING, Any, Callable, Dict, List
  3 | 
  4 | from redisvl.utils.utils import lazy_import
  5 | 
  6 | if TYPE_CHECKING:
  7 |     from ranx import Qrels, Run, evaluate
  8 | else:
  9 |     Qrels = lazy_import("ranx.Qrels")
 10 |     Run = lazy_import("ranx.Run")
 11 |     evaluate = lazy_import("ranx.evaluate")
 12 | 
 13 | np = lazy_import("numpy")
 14 | 
 15 | from redisvl.extensions.router.semantic import SemanticRouter
 16 | from redisvl.utils.optimize.base import BaseThresholdOptimizer, EvalMetric
 17 | from redisvl.utils.optimize.schema import LabeledData
 18 | from redisvl.utils.optimize.utils import NULL_RESPONSE_KEY, _format_qrels
 19 | 
 20 | 
 21 | def _generate_run_router(test_data: List[LabeledData], router: SemanticRouter) -> "Run":
 22 |     """Format router results into format for ranx Run"""
 23 |     run_dict: Dict[Any, Any] = {}
 24 | 
 25 |     for td in test_data:
 26 |         run_dict[td.id] = {}
 27 |         route_match = router(td.query)
 28 |         if route_match and route_match.name == td.query_match:
 29 |             run_dict[td.id][td.query_match] = np.int64(1)
 30 |         else:
 31 |             run_dict[td.id][NULL_RESPONSE_KEY] = np.int64(1)
 32 | 
 33 |     return Run(run_dict)
 34 | 
 35 | 
 36 | def _eval_router(
 37 |     router: SemanticRouter,
 38 |     test_data: List[LabeledData],
 39 |     qrels: "Qrels",
 40 |     eval_metric: str,
 41 | ) -> float:
 42 |     """Evaluate acceptable metric given run and qrels data"""
 43 |     run = _generate_run_router(test_data, router)
 44 |     return evaluate(qrels, run, eval_metric, make_comparable=True)
 45 | 
 46 | 
 47 | def _router_random_search(
 48 |     route_names: List[str], route_thresholds: dict, search_step=0.10
 49 | ):
 50 |     """Performs random search for many thresholds to many routes"""
 51 |     score_threshold_values = []
 52 |     for route in route_names:
 53 |         score_threshold_values.append(
 54 |             np.linspace(
 55 |                 start=max(route_thresholds[route] - search_step, 0),
 56 |                 stop=route_thresholds[route] + search_step,
 57 |                 num=100,
 58 |             )
 59 |         )
 60 | 
 61 |     return {
 62 |         route: float(random.choice(score_threshold_values[i]))
 63 |         for i, route in enumerate(route_names)
 64 |     }
 65 | 
 66 | 
 67 | def _random_search_opt_router(
 68 |     router: SemanticRouter,
 69 |     test_data: List[LabeledData],
 70 |     qrels: "Qrels",
 71 |     eval_metric: EvalMetric,
 72 |     **kwargs: Any,
 73 | ):
 74 |     """Performs complete optimization for router cases provide acceptable metric"""
 75 | 
 76 |     start_score = _eval_router(router, test_data, qrels, eval_metric.value)
 77 |     best_score = start_score
 78 |     best_thresholds = router.route_thresholds
 79 | 
 80 |     max_iterations = kwargs.get("max_iterations", 20)
 81 |     search_step = kwargs.get("search_step", 0.10)
 82 | 
 83 |     for _ in range(max_iterations):
 84 |         route_names = router.route_names
 85 |         route_thresholds = router.route_thresholds
 86 |         thresholds = _router_random_search(
 87 |             route_names=route_names,
 88 |             route_thresholds=route_thresholds,
 89 |             search_step=search_step,
 90 |         )
 91 |         router.update_route_thresholds(thresholds)
 92 |         score = _eval_router(router, test_data, qrels, eval_metric.value)
 93 |         if score > best_score:
 94 |             best_score = score
 95 |             best_thresholds = thresholds
 96 | 
 97 |     print(
 98 |         f"Eval metric {eval_metric.value.upper()}: start {round(start_score, 3)}, end {round(best_score, 3)} \nEnding thresholds: {router.route_thresholds}"
 99 |     )
100 |     router.update_route_thresholds(best_thresholds)
101 | 
102 | 
103 | class RouterThresholdOptimizer(BaseThresholdOptimizer):
104 |     """
105 |     Class for optimizing thresholds for a SemanticRouter.
106 | 
107 |     .. code-block:: python
108 | 
109 |         from redisvl.extensions.router import Route, SemanticRouter
110 |         from redisvl.utils.vectorize import HFTextVectorizer
111 |         from redisvl.utils.optimize import RouterThresholdOptimizer
112 | 
113 |         routes = [
114 |                 Route(
115 |                     name="greeting",
116 |                     references=["hello", "hi"],
117 |                     metadata={"type": "greeting"},
118 |                     distance_threshold=0.5,
119 |                 ),
120 |                 Route(
121 |                     name="farewell",
122 |                     references=["bye", "goodbye"],
123 |                     metadata={"type": "farewell"},
124 |                     distance_threshold=0.5,
125 |                 ),
126 |             ]
127 | 
128 |         router = SemanticRouter(
129 |             name="greeting-router",
130 |             vectorizer=HFTextVectorizer(),
131 |             routes=routes,
132 |             redis_url="redis://localhost:6379",
133 |             overwrite=True # Blow away any other routing index with this name
134 |         )
135 | 
136 |         test_data = [
137 |             {"query": "hello", "query_match": "greeting"},
138 |             {"query": "goodbye", "query_match": "farewell"},
139 |             ...
140 |         ]
141 | 
142 |         optimizer = RouterThresholdOptimizer(router, test_data)
143 |         optimizer.optimize()
144 |     """
145 | 
146 |     def __init__(
147 |         self,
148 |         router: SemanticRouter,
149 |         test_dict: List[Dict[str, Any]],
150 |         opt_fn: Callable = _random_search_opt_router,
151 |         eval_metric: str = "f1",
152 |     ):
153 |         """Initialize the router optimizer.
154 | 
155 |         Args:
156 |             router (SemanticRouter): The RedisVL SemanticRouter instance to optimize.
157 |             test_dict (List[Dict[str, Any]]): List of test cases.
158 |             opt_fn (Callable): Function to perform optimization. Defaults to
159 |                 grid search.
160 |             eval_metric (str): Evaluation metric for threshold optimization.
161 |                 Defaults to "f1" score.
162 |         Raises:
163 |             ValueError: If the test_dict not in LabeledData format.
164 |         """
165 |         super().__init__(router, test_dict, opt_fn, eval_metric)
166 | 
167 |     def optimize(self, **kwargs: Any):
168 |         """Optimize kicks off the optimization process for router"""
169 |         qrels = _format_qrels(self.test_data)
170 |         self.opt_fn(self.optimizable, self.test_data, qrels, self.eval_metric, **kwargs)
171 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/schema.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | from ulid import ULID
 5 | 
 6 | 
 7 | class LabeledData(BaseModel):
 8 |     id: str = Field(default_factory=lambda: str(ULID()))
 9 |     query: str
10 |     query_match: Optional[str]
11 |     response: List[dict] = []
12 | 


--------------------------------------------------------------------------------
/redisvl/utils/optimize/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, List
 2 | 
 3 | from redisvl.utils.utils import lazy_import
 4 | 
 5 | if TYPE_CHECKING:
 6 |     from ranx import Qrels
 7 | else:
 8 |     Qrels = lazy_import("ranx.Qrels")
 9 | 
10 | np = lazy_import("numpy")
11 | 
12 | from redisvl.utils.optimize.schema import LabeledData
13 | 
14 | NULL_RESPONSE_KEY = "no_match"
15 | 
16 | 
17 | def _format_qrels(test_data: List[LabeledData]) -> "Qrels":
18 |     """Utility function for creating qrels for evaluation with ranx"""
19 |     qrels_dict = {}
20 | 
21 |     for td in test_data:
22 |         if td.query_match:
23 |             qrels_dict[td.id] = {td.query_match: np.int64(1)}
24 |         else:
25 |             # This is for capturing true negatives from test set
26 |             qrels_dict[td.id] = {NULL_RESPONSE_KEY: np.int64(1)}
27 | 
28 |     return Qrels(qrels_dict)
29 | 
30 | 
31 | def _validate_test_dict(test_dict: List[dict]) -> List[LabeledData]:
32 |     """Convert/validate test_dict for use in optimizer"""
33 |     return [LabeledData(**d) for d in test_dict]
34 | 


--------------------------------------------------------------------------------
/redisvl/utils/rerank/__init__.py:
--------------------------------------------------------------------------------
 1 | from redisvl.utils.rerank.base import BaseReranker
 2 | from redisvl.utils.rerank.cohere import CohereReranker
 3 | from redisvl.utils.rerank.hf_cross_encoder import HFCrossEncoderReranker
 4 | from redisvl.utils.rerank.voyageai import VoyageAIReranker
 5 | 
 6 | __all__ = [
 7 |     "BaseReranker",
 8 |     "CohereReranker",
 9 |     "HFCrossEncoderReranker",
10 |     "VoyageAIReranker",
11 | ]
12 | 


--------------------------------------------------------------------------------
/redisvl/utils/rerank/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Dict, List, Optional, Tuple, Union
 3 | 
 4 | from pydantic import BaseModel, field_validator
 5 | 
 6 | 
 7 | class BaseReranker(BaseModel, ABC):
 8 |     model: str
 9 |     rank_by: Optional[List[str]] = None
10 |     limit: int
11 |     return_score: bool
12 | 
13 |     @field_validator("limit")
14 |     @classmethod
15 |     def check_limit(cls, value):
16 |         """Ensures the limit is a positive integer."""
17 |         if value <= 0:
18 |             raise ValueError("Limit must be a positive integer.")
19 |         return value
20 | 
21 |     @field_validator("rank_by")
22 |     @classmethod
23 |     def check_rank_by(cls, value):
24 |         """Ensures that rank_by is a list of strings if provided."""
25 |         if value is not None and (
26 |             not isinstance(value, list)
27 |             or any(not isinstance(item, str) for item in value)
28 |         ):
29 |             raise ValueError("rank_by must be a list of strings.")
30 |         return value
31 | 
32 |     @abstractmethod
33 |     def rank(
34 |         self, query: str, docs: Union[List[Dict[str, Any]], List[str]], **kwargs
35 |     ) -> Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
36 |         """
37 |         Synchronously rerank the docs based on the provided query.
38 |         """
39 |         pass
40 | 
41 |     @abstractmethod
42 |     async def arank(
43 |         self, query: str, docs: Union[List[Dict[str, Any]], List[str]], **kwargs
44 |     ) -> Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
45 |         """
46 |         Asynchronously rerank the docs based on the provided query.
47 |         """
48 |         pass
49 | 


--------------------------------------------------------------------------------
/redisvl/utils/rerank/hf_cross_encoder.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Tuple, Union
  2 | 
  3 | from pydantic import PrivateAttr
  4 | 
  5 | from redisvl.utils.rerank.base import BaseReranker
  6 | 
  7 | 
  8 | class HFCrossEncoderReranker(BaseReranker):
  9 |     """
 10 |     The HFCrossEncoderReranker class uses a cross-encoder models from Hugging Face
 11 |     to rerank documents based on an input query.
 12 | 
 13 |     This reranker loads a cross-encoder model using the `CrossEncoder` class
 14 |     from the `sentence_transformers` library. It requires the
 15 |     `sentence_transformers` library to be installed.
 16 | 
 17 |     .. code-block:: python
 18 | 
 19 |         from redisvl.utils.rerank import HFCrossEncoderReranker
 20 | 
 21 |         # set up the HFCrossEncoderReranker with a specific model
 22 |         reranker = HFCrossEncoderReranker(model_name="cross-encoder/ms-marco-MiniLM-L-6-v2", limit=3)
 23 |         # rerank raw search results based on user input/query
 24 |         results = reranker.rank(
 25 |             query="your input query text here",
 26 |             docs=[
 27 |                 {"content": "document 1"},
 28 |                 {"content": "document 2"},
 29 |                 {"content": "document 3"}
 30 |             ]
 31 |         )
 32 |     """
 33 | 
 34 |     _client: Any = PrivateAttr()
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2",
 39 |         limit: int = 3,
 40 |         return_score: bool = True,
 41 |         **kwargs,
 42 |     ):
 43 |         """
 44 |         Initialize the HFCrossEncoderReranker with a specified model and ranking criteria.
 45 | 
 46 |         Parameters:
 47 |             model (str): The name or path of the cross-encoder model to use for reranking.
 48 |                 Defaults to 'cross-encoder/ms-marco-MiniLM-L-6-v2'.
 49 |             limit (int): The maximum number of results to return after reranking. Must be a positive integer.
 50 |             return_score (bool): Whether to return scores alongside the reranked results.
 51 |         """
 52 |         model = model or kwargs.pop("model_name", None)
 53 |         super().__init__(
 54 |             model=model, rank_by=None, limit=limit, return_score=return_score
 55 |         )
 56 |         self._initialize_client(**kwargs)
 57 | 
 58 |     def _initialize_client(self, **kwargs):
 59 |         """
 60 |         Setup the huggingface cross-encoder client using optional kwargs.
 61 |         """
 62 |         # Dynamic import of the sentence-transformers module
 63 |         try:
 64 |             from sentence_transformers import CrossEncoder
 65 |         except ImportError:
 66 |             raise ImportError(
 67 |                 "HFCrossEncoder reranker requires the sentence-transformers library. \
 68 |                     Please install with `pip install sentence-transformers`"
 69 |             )
 70 | 
 71 |         self._client = CrossEncoder(self.model, **kwargs)
 72 | 
 73 |     def rank(
 74 |         self, query: str, docs: Union[List[Dict[str, Any]], List[str]], **kwargs
 75 |     ) -> Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
 76 |         """
 77 |         Rerank documents based on the provided query using the loaded cross-encoder model.
 78 | 
 79 |         This method processes the user's query and the provided documents to rerank them
 80 |         in a manner that is potentially more relevant to the query's context.
 81 | 
 82 |         Parameters:
 83 |             query (str): The user's search query.
 84 |             docs (Union[List[Dict[str, Any]], List[str]]): The list of documents to be ranked,
 85 |                 either as dictionaries or strings.
 86 | 
 87 |         Returns:
 88 |             Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
 89 |                 The reranked list of documents and optionally associated scores.
 90 |         """
 91 |         limit = kwargs.get("limit", self.limit)
 92 |         return_score = kwargs.get("return_score", self.return_score)
 93 | 
 94 |         if not query:
 95 |             raise ValueError("query cannot be empty")
 96 | 
 97 |         if not isinstance(query, str):
 98 |             raise TypeError("query must be a string")
 99 | 
100 |         if not isinstance(docs, list):
101 |             raise TypeError("docs must be a list")
102 | 
103 |         if not docs:
104 |             return [] if not return_score else ([], [])
105 | 
106 |         if all(isinstance(doc, dict) for doc in docs):
107 |             texts = [
108 |                 str(doc["content"])
109 |                 for doc in docs
110 |                 if isinstance(doc, dict) and "content" in doc
111 |             ]
112 |             doc_subset = [
113 |                 doc for doc in docs if isinstance(doc, dict) and "content" in doc
114 |             ]
115 |         else:
116 |             texts = [str(doc) for doc in docs]
117 |             doc_subset = [{"content": doc} for doc in docs]
118 | 
119 |         scores = self._client.predict([(query, text) for text in texts])
120 |         scores = [float(score) for score in scores]
121 |         docs_with_scores = list(zip(doc_subset, scores))
122 |         docs_with_scores.sort(key=lambda x: x[1], reverse=True)
123 |         reranked_docs = [doc for doc, _ in docs_with_scores[:limit]]
124 |         scores = scores[:limit]
125 | 
126 |         if return_score:
127 |             return reranked_docs, scores  # type: ignore
128 |         return reranked_docs
129 | 
130 |     async def arank(
131 |         self, query: str, docs: Union[List[Dict[str, Any]], List[str]], **kwargs
132 |     ) -> Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
133 |         """
134 |         Asynchronously rerank documents based on the provided query using the loaded cross-encoder model.
135 | 
136 |         This method processes the user's query and the provided documents to rerank them
137 |         in a manner that is potentially more relevant to the query's context.
138 | 
139 |         Parameters:
140 |             query (str): The user's search query.
141 |             docs (Union[List[Dict[str, Any]], List[str]]): The list of documents to be ranked,
142 |                 either as dictionaries or strings.
143 | 
144 |         Returns:
145 |             Union[Tuple[List[Dict[str, Any]], List[float]], List[Dict[str, Any]]]:
146 |                 The reranked list of documents and optionally associated scores.
147 |         """
148 |         return self.rank(query, docs, **kwargs)
149 | 


--------------------------------------------------------------------------------
/redisvl/utils/token_escaper.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Optional, Pattern
 3 | 
 4 | 
 5 | class TokenEscaper:
 6 |     """Escape punctuation within an input string.
 7 | 
 8 |     Adapted from RedisOM Python.
 9 |     """
10 | 
11 |     # Characters that RediSearch requires us to escape during queries.
12 |     # Source: https://redis.io/docs/stack/search/reference/escaping/#the-rules-of-text-field-tokenization
13 |     DEFAULT_ESCAPED_CHARS = r"[,.<>{}\[\]\\\"\':;!@#$%^&*()\-+=~\/ ]"
14 | 
15 |     def __init__(self, escape_chars_re: Optional[Pattern] = None):
16 |         if escape_chars_re:
17 |             self.escaped_chars_re = escape_chars_re
18 |         else:
19 |             self.escaped_chars_re = re.compile(self.DEFAULT_ESCAPED_CHARS)
20 | 
21 |     def escape(self, value: str) -> str:
22 |         if not isinstance(value, str):
23 |             raise TypeError(
24 |                 f"Value must be a string object for token escaping, got type {type(value)}"
25 |             )
26 | 
27 |         def escape_symbol(match):
28 |             value = match.group(0)
29 |             return f"\\{value}"
30 | 
31 |         return self.escaped_chars_re.sub(escape_symbol, value)
32 | 


--------------------------------------------------------------------------------
/redisvl/utils/vectorize/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional
 3 | 
 4 | from redisvl.extensions.cache.embeddings import EmbeddingsCache
 5 | from redisvl.utils.vectorize.base import BaseVectorizer, Vectorizers
 6 | from redisvl.utils.vectorize.text.azureopenai import AzureOpenAITextVectorizer
 7 | from redisvl.utils.vectorize.text.bedrock import BedrockTextVectorizer
 8 | from redisvl.utils.vectorize.text.cohere import CohereTextVectorizer
 9 | from redisvl.utils.vectorize.text.custom import CustomTextVectorizer
10 | from redisvl.utils.vectorize.text.huggingface import HFTextVectorizer
11 | from redisvl.utils.vectorize.text.mistral import MistralAITextVectorizer
12 | from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer
13 | from redisvl.utils.vectorize.text.vertexai import VertexAITextVectorizer
14 | from redisvl.utils.vectorize.text.voyageai import VoyageAITextVectorizer
15 | 
16 | __all__ = [
17 |     "BaseVectorizer",
18 |     "CohereTextVectorizer",
19 |     "HFTextVectorizer",
20 |     "OpenAITextVectorizer",
21 |     "VertexAITextVectorizer",
22 |     "AzureOpenAITextVectorizer",
23 |     "MistralAITextVectorizer",
24 |     "CustomTextVectorizer",
25 |     "BedrockTextVectorizer",
26 |     "VoyageAITextVectorizer",
27 | ]
28 | 
29 | 
30 | def vectorizer_from_dict(
31 |     vectorizer: dict,
32 |     cache: dict = {},
33 |     cache_folder=os.getenv("SENTENCE_TRANSFORMERS_HOME"),
34 | ) -> BaseVectorizer:
35 |     vectorizer_type = Vectorizers(vectorizer["type"])
36 |     model = vectorizer["model"]
37 | 
38 |     args = {"model": model}
39 |     if cache:
40 |         emb_cache = EmbeddingsCache(**cache)
41 |         args["cache"] = emb_cache
42 | 
43 |     if vectorizer_type == Vectorizers.cohere:
44 |         return CohereTextVectorizer(**args)
45 |     elif vectorizer_type == Vectorizers.openai:
46 |         return OpenAITextVectorizer(**args)
47 |     elif vectorizer_type == Vectorizers.azure_openai:
48 |         return AzureOpenAITextVectorizer(**args)
49 |     elif vectorizer_type == Vectorizers.hf:
50 |         return HFTextVectorizer(**args)
51 |     elif vectorizer_type == Vectorizers.mistral:
52 |         return MistralAITextVectorizer(**args)
53 |     elif vectorizer_type == Vectorizers.vertexai:
54 |         return VertexAITextVectorizer(**args)
55 |     elif vectorizer_type == Vectorizers.voyageai:
56 |         return VoyageAITextVectorizer(**args)
57 |     else:
58 |         raise ValueError(f"Unsupported vectorizer type: {vectorizer_type}")
59 | 


--------------------------------------------------------------------------------
/redisvl/utils/vectorize/text/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/redis/redis-vl-python/defa62a3f291fd8093baf0c652c7d71e6528bfcb/redisvl/utils/vectorize/text/__init__.py


--------------------------------------------------------------------------------
/redisvl/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.7.0"
2 | 


--------------------------------------------------------------------------------
/schemas/schema.yaml:
--------------------------------------------------------------------------------
 1 | version: '0.1.0'
 2 | 
 3 | index:
 4 |   name: user-idx
 5 |   prefix: user
 6 |   storage_type: json
 7 | 
 8 | fields:
 9 |   - name: user
10 |     type: tag
11 |   - name: credit_score
12 |     type: tag
13 |   - name: embedding
14 |     type: vector
15 |     attrs:
16 |       algorithm: flat
17 |       dims: 4
18 |       distance_metric: cosine
19 |       datatype: float32
20 | 


--------------------------------------------------------------------------------
/schemas/semantic_router.yaml:
--------------------------------------------------------------------------------
 1 | name: test-router
 2 | routes:
 3 | - name: greeting
 4 |   references:
 5 |   - hello
 6 |   - hi
 7 |   metadata:
 8 |     type: greeting
 9 |   distance_threshold: 0.3
10 | - name: farewell
11 |   references:
12 |   - bye
13 |   - goodbye
14 |   metadata:
15 |     type: farewell
16 |   distance_threshold: 0.2
17 | vectorizer:
18 |   type: hf
19 |   model: sentence-transformers/all-mpnet-base-v2
20 | routing_config:
21 |   max_k: 2
22 |   aggregation_method: avg
23 | 


--------------------------------------------------------------------------------
/schemas/test_hash_schema.yaml:
--------------------------------------------------------------------------------
 1 | version: '0.1.0'
 2 | 
 3 | index:
 4 |     name: hash-test
 5 |     prefix: hash
 6 |     storage_type: hash
 7 | 
 8 | fields:
 9 |   - name: sentence
10 |     type: text
11 |   - name: embedding
12 |     type: vector
13 |     attrs:
14 |       dims: 768
15 |       algorithm: flat
16 |       distance_metric: cosine


--------------------------------------------------------------------------------
/schemas/test_json_schema.yaml:
--------------------------------------------------------------------------------
 1 | version: '0.1.0'
 2 | 
 3 | index:
 4 |     name: json-test
 5 |     prefix: json
 6 |     storage_type: json
 7 | 
 8 | fields:
 9 |   - name: sentence
10 |     type: text
11 |   - name: embedding
12 |     type: vector
13 |     attrs:
14 |       dims: 768
15 |       algorithm: flat
16 |       distance_metric: cosine


--------------------------------------------------------------------------------
/scripts.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import sys
 3 | 
 4 | 
 5 | def format():
 6 |     subprocess.run(["isort", "./redisvl", "./tests/", "--profile", "black"], check=True)
 7 |     subprocess.run(["black", "./redisvl", "./tests/"], check=True)
 8 | 
 9 | 
10 | def check_format():
11 |     subprocess.run(["black", "--check", "./redisvl"], check=True)
12 | 
13 | 
14 | def sort_imports():
15 |     subprocess.run(["isort", "./redisvl", "./tests/", "--profile", "black"], check=True)
16 | 
17 | 
18 | def check_sort_imports():
19 |     subprocess.run(
20 |         ["isort", "./redisvl", "--check-only", "--profile", "black"], check=True
21 |     )
22 | 
23 | 
24 | def check_lint():
25 |     subprocess.run(["pylint", "--rcfile=.pylintrc", "./redisvl"], check=True)
26 | 
27 | 
28 | def check_mypy():
29 |     subprocess.run(["python", "-m", "mypy", "./redisvl"], check=True)
30 | 
31 | 
32 | def test():
33 |     test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"]
34 |     # Get any extra arguments passed to the script
35 |     extra_args = sys.argv[1:]
36 |     if extra_args:
37 |         test_cmd.extend(extra_args)
38 |     subprocess.run(test_cmd, check=True)
39 | 
40 | 
41 | def test_verbose():
42 |     test_cmd = [
43 |         "python",
44 |         "-m",
45 |         "pytest",
46 |         "-n",
47 |         "auto",
48 |         "-vv",
49 |         "-s",
50 |         "--log-level=CRITICAL",
51 |     ]
52 |     # Get any extra arguments passed to the script
53 |     extra_args = sys.argv[1:]
54 |     if extra_args:
55 |         test_cmd.extend(extra_args)
56 |     subprocess.run(test_cmd, check=True)
57 | 
58 | 
59 | def test_notebooks():
60 |     test_cmd = [
61 |         "python",
62 |         "-m",
63 |         "pytest",
64 |         "--nbval-lax",
65 |         "./docs/user_guide",
66 |         "-vvv",
67 |     ]
68 |     extra_args = sys.argv[1:]
69 |     if extra_args:
70 |         test_cmd.extend(extra_args)
71 | 
72 |     subprocess.run(
73 |         test_cmd,
74 |         check=True,
75 |     )
76 | 
77 | 
78 | def build_docs():
79 |     subprocess.run("cd docs/ && make html", shell=True)
80 | 
81 | 
82 | def serve_docs():
83 |     subprocess.run("cd docs/_build/html && python -m http.server", shell=True)
84 | 


--------------------------------------------------------------------------------
/tests/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   redis:
 3 |     image: "${REDIS_IMAGE:-redis/redis-stack-server:latest}"
 4 |     ports:
 5 |       - "6379"
 6 |     environment:
 7 |       - "REDIS_ARGS=--save '' --appendonly no"
 8 |     deploy:
 9 |       replicas: 1
10 |       restart_policy:
11 |         condition: on-failure
12 |     labels:
13 |       - "com.docker.compose.publishers=redis,6379,6379"


--------------------------------------------------------------------------------
/tests/integration/test_cross_encoder_reranker.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from redisvl.utils.rerank.hf_cross_encoder import HFCrossEncoderReranker
 4 | 
 5 | 
 6 | @pytest.fixture(scope="session")
 7 | def reranker():
 8 |     return HFCrossEncoderReranker()
 9 | 
10 | 
11 | def test_rank_documents(reranker):
12 |     docs = ["document one", "document two", "document three"]
13 |     query = "search query"
14 | 
15 |     reranked_docs, scores = reranker.rank(query, docs)
16 | 
17 |     assert isinstance(reranked_docs, list)
18 |     assert len(reranked_docs) == reranker.limit
19 |     assert all(isinstance(score, float) for score in scores)
20 | 
21 | 
22 | @pytest.mark.asyncio
23 | async def test_async_rank_documents(reranker):
24 |     docs = ["document one", "document two", "document three"]
25 |     query = "search query"
26 | 
27 |     reranked_docs, scores = await reranker.arank(query, docs)
28 | 
29 |     assert isinstance(reranked_docs, list)
30 |     assert len(reranked_docs) == reranker.limit
31 |     assert all(isinstance(score, float) for score in scores)
32 | 
33 | 
34 | def test_bad_input(reranker):
35 |     with pytest.raises(ValueError):
36 |         reranker.rank("", [])  # Empty query
37 | 
38 |     with pytest.raises(TypeError):
39 |         reranker.rank(123, ["valid document"])  # Invalid type for query
40 | 
41 |     with pytest.raises(TypeError):
42 |         reranker.rank("valid query", "not a list")  # Invalid type for documents
43 | 
44 | 
45 | def test_rerank_empty(reranker):
46 |     docs = []
47 |     query = "search query"
48 | 
49 |     reranked_docs = reranker.rank(query, docs, return_score=False)
50 | 
51 |     assert isinstance(reranked_docs, list)
52 |     assert len(reranked_docs) == 0
53 | 


--------------------------------------------------------------------------------
/tests/integration/test_flow.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from redisvl.index import SearchIndex
  4 | from redisvl.query import VectorQuery
  5 | from redisvl.redis.utils import array_to_buffer
  6 | from redisvl.schema import StorageType
  7 | 
  8 | fields_spec = [
  9 |     {"name": "credit_score", "type": "tag"},
 10 |     {"name": "user", "type": "tag"},
 11 |     {"name": "job", "type": "text"},
 12 |     {"name": "age", "type": "numeric"},
 13 |     {
 14 |         "name": "user_embedding",
 15 |         "type": "vector",
 16 |         "attrs": {
 17 |             "dims": 3,
 18 |             "distance_metric": "cosine",
 19 |             "algorithm": "flat",
 20 |             "datatype": "float32",
 21 |         },
 22 |     },
 23 | ]
 24 | 
 25 | hash_schema = {
 26 |     "index": {
 27 |         "name": "user_index_hash",
 28 |         "prefix": "users_hash",
 29 |         "storage_type": "hash",
 30 |     },
 31 |     "fields": fields_spec,
 32 | }
 33 | 
 34 | json_schema = {
 35 |     "index": {
 36 |         "name": "user_index_json",
 37 |         "prefix": "users_json",
 38 |         "storage_type": "json",
 39 |     },
 40 |     "fields": fields_spec,
 41 | }
 42 | 
 43 | 
 44 | @pytest.mark.parametrize("schema", [hash_schema, json_schema])
 45 | def test_simple(client, schema, sample_data, worker_id):
 46 |     # Update schema with worker_id
 47 |     schema = schema.copy()
 48 |     schema["index"] = schema["index"].copy()
 49 |     schema["index"]["name"] = f"{schema['index']['name']}_{worker_id}"
 50 |     schema["index"]["prefix"] = f"{schema['index']['prefix']}_{worker_id}"
 51 |     index = SearchIndex.from_dict(schema, redis_client=client)
 52 |     # create the index
 53 |     index.create(overwrite=True, drop=True)
 54 | 
 55 |     # Prepare and load the data based on storage type
 56 |     def hash_preprocess(item: dict) -> dict:
 57 |         return {
 58 |             **item,
 59 |             "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
 60 |         }
 61 | 
 62 |     if index.storage_type == StorageType.HASH:
 63 |         index.load(sample_data, preprocess=hash_preprocess, id_field="user")
 64 |     else:
 65 |         index.load(sample_data, id_field="user")
 66 | 
 67 |     assert index.fetch("john")
 68 | 
 69 |     return_fields = ["user", "age", "job", "credit_score"]
 70 |     query = VectorQuery(
 71 |         vector=[0.1, 0.1, 0.5],
 72 |         vector_field_name="user_embedding",
 73 |         return_fields=return_fields,
 74 |         num_results=3,
 75 |     )
 76 | 
 77 |     results = index.search(query.query, query_params=query.params)
 78 |     results_2 = index.query(query)
 79 |     assert len(results.docs) == len(results_2)
 80 | 
 81 |     # make sure correct users returned
 82 |     # users = list(results.docs)
 83 |     # print(len(users))
 84 |     users = [doc for doc in results.docs]
 85 |     assert users[0].user in ["john", "mary"]
 86 |     assert users[1].user in ["john", "mary"]
 87 | 
 88 |     # make sure vector scores are correct
 89 |     # query vector and first two are the same vector.
 90 |     # third is different (hence should be positive difference)
 91 |     assert float(users[0].vector_distance) == 0.0
 92 |     assert float(users[1].vector_distance) == 0.0
 93 |     assert float(users[2].vector_distance) > 0
 94 | 
 95 |     for doc1, doc2 in zip(results.docs, results_2):
 96 |         for field in return_fields:
 97 |             assert getattr(doc1, field) == doc2[field]
 98 | 
 99 |     count_deleted_keys = index.clear()
100 |     assert count_deleted_keys == len(sample_data)
101 | 
102 |     assert index.exists() == True
103 | 
104 |     index.delete()
105 | 
106 |     assert index.exists() == False
107 | 


--------------------------------------------------------------------------------
/tests/integration/test_flow_async.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import time
  3 | 
  4 | import pytest
  5 | 
  6 | from redisvl.index import AsyncSearchIndex
  7 | from redisvl.query import VectorQuery
  8 | from redisvl.redis.utils import array_to_buffer
  9 | from redisvl.schema import StorageType
 10 | 
 11 | fields_spec = [
 12 |     {"name": "credit_score", "type": "tag"},
 13 |     {"name": "user", "type": "tag"},
 14 |     {"name": "job", "type": "text"},
 15 |     {"name": "age", "type": "numeric"},
 16 |     {
 17 |         "name": "user_embedding",
 18 |         "type": "vector",
 19 |         "attrs": {
 20 |             "dims": 3,
 21 |             "distance_metric": "cosine",
 22 |             "algorithm": "flat",
 23 |             "datatype": "float32",
 24 |         },
 25 |     },
 26 | ]
 27 | 
 28 | hash_schema = {
 29 |     "index": {
 30 |         "name": "user_index_hash",
 31 |         "prefix": "users_hash",
 32 |         "storage_type": "hash",
 33 |     },
 34 |     "fields": fields_spec,
 35 | }
 36 | 
 37 | json_schema = {
 38 |     "index": {
 39 |         "name": "user_index_json",
 40 |         "prefix": "users_json",
 41 |         "storage_type": "json",
 42 |     },
 43 |     "fields": fields_spec,
 44 | }
 45 | 
 46 | 
 47 | @pytest.mark.asyncio
 48 | @pytest.mark.parametrize("schema", [hash_schema, json_schema])
 49 | async def test_simple(async_client, schema, sample_data, worker_id):
 50 |     # Update schema with worker_id
 51 |     schema = schema.copy()
 52 |     schema["index"] = schema["index"].copy()
 53 |     schema["index"]["name"] = f"{schema['index']['name']}_{worker_id}"
 54 |     schema["index"]["prefix"] = f"{schema['index']['prefix']}_{worker_id}"
 55 |     index = AsyncSearchIndex.from_dict(schema, redis_client=async_client)
 56 |     # create the index
 57 |     await index.create(overwrite=True, drop=True)
 58 | 
 59 |     # Prepare and load the data based on storage type
 60 |     def hash_preprocess(item: dict) -> dict:
 61 |         return {
 62 |             **item,
 63 |             "user_embedding": array_to_buffer(item["user_embedding"], "float32"),
 64 |         }
 65 | 
 66 |     if index.storage_type == StorageType.HASH:
 67 |         await index.load(sample_data, preprocess=hash_preprocess, id_field="user")
 68 |     else:
 69 |         await index.load(sample_data, id_field="user")
 70 | 
 71 |     assert await index.fetch("john")
 72 | 
 73 |     # wait for async index to create
 74 |     await asyncio.sleep(1)
 75 | 
 76 |     return_fields = ["user", "age", "job", "credit_score"]
 77 |     query = VectorQuery(
 78 |         vector=[0.1, 0.1, 0.5],
 79 |         vector_field_name="user_embedding",
 80 |         return_fields=return_fields,
 81 |         num_results=3,
 82 |     )
 83 | 
 84 |     results = await index.search(query.query, query_params=query.params)
 85 |     results_2 = await index.query(query)
 86 |     assert len(results.docs) == len(results_2)
 87 | 
 88 |     # make sure correct users returned
 89 |     users = [doc for doc in results.docs]
 90 |     assert users[0].user in ["john", "mary"]
 91 |     assert users[1].user in ["john", "mary"]
 92 | 
 93 |     # make sure vector scores are correct
 94 |     assert float(users[0].vector_distance) == 0.0
 95 |     assert float(users[1].vector_distance) == 0.0
 96 |     assert float(users[2].vector_distance) > 0
 97 | 
 98 |     for doc1, doc2 in zip(results.docs, results_2):
 99 |         for field in return_fields:
100 |             assert getattr(doc1, field) == doc2[field]
101 | 
102 |     count_deleted_keys = await index.clear()
103 |     assert count_deleted_keys == len(sample_data)
104 | 
105 |     assert await index.exists() == True
106 | 
107 |     await index.delete()
108 | 
109 |     assert await index.exists() == False
110 | 


--------------------------------------------------------------------------------
/tests/integration/test_rerankers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pytest
  4 | 
  5 | from redisvl.utils.rerank import (
  6 |     CohereReranker,
  7 |     HFCrossEncoderReranker,
  8 |     VoyageAIReranker,
  9 | )
 10 | 
 11 | 
 12 | # Fixture for the reranker instance
 13 | @pytest.fixture(
 14 |     params=[
 15 |         CohereReranker,
 16 |         VoyageAIReranker,
 17 |     ]
 18 | )
 19 | def reranker(request):
 20 |     if request.param == CohereReranker:
 21 |         return request.param()
 22 |     elif request.param == VoyageAIReranker:
 23 |         return request.param(model="rerank-lite-1")
 24 | 
 25 | 
 26 | @pytest.fixture
 27 | def hfCrossEncoderReranker():
 28 |     return HFCrossEncoderReranker()
 29 | 
 30 | 
 31 | @pytest.fixture
 32 | def hfCrossEncoderRerankerWithCustomModel():
 33 |     return HFCrossEncoderReranker("cross-encoder/stsb-distilroberta-base")
 34 | 
 35 | 
 36 | @pytest.mark.requires_api_keys
 37 | def test_rank_documents(reranker):
 38 |     docs = ["document one", "document two", "document three"]
 39 |     query = "search query"
 40 | 
 41 |     reranked_docs, scores = reranker.rank(query, docs)
 42 | 
 43 |     assert isinstance(reranked_docs, list)
 44 |     assert len(reranked_docs) == len(docs)  # Ensure we get back as many docs as we sent
 45 |     assert all(isinstance(score, float) for score in scores)  # Scores should be floats
 46 | 
 47 | 
 48 | @pytest.mark.requires_api_keys
 49 | @pytest.mark.asyncio
 50 | async def test_async_rank_documents(reranker):
 51 |     docs = ["document one", "document two", "document three"]
 52 |     query = "search query"
 53 | 
 54 |     reranked_docs, scores = await reranker.arank(query, docs)
 55 | 
 56 |     assert isinstance(reranked_docs, list)
 57 |     assert len(reranked_docs) == len(docs)  # Ensure we get back as many docs as we sent
 58 |     assert all(isinstance(score, float) for score in scores)  # Scores should be floats
 59 | 
 60 | 
 61 | @pytest.mark.requires_api_keys
 62 | def test_bad_input(reranker):
 63 |     with pytest.raises(Exception):
 64 |         reranker.rank("", [])  # Empty query or documents
 65 | 
 66 |     with pytest.raises(Exception):
 67 |         reranker.rank(123, ["valid document"])  # Invalid type for query
 68 | 
 69 |     with pytest.raises(Exception):
 70 |         reranker.rank("valid query", "not a list")  # Invalid type for documents
 71 | 
 72 |     if isinstance(reranker, CohereReranker):
 73 |         with pytest.raises(Exception):
 74 |             reranker.rank(
 75 |                 "valid query", [{"field": "valid document"}], rank_by=["invalid_field"]
 76 |             )  # Invalid rank_by field
 77 | 
 78 | 
 79 | def test_rank_documents_cross_encoder(hfCrossEncoderReranker):
 80 |     query = "I love you"
 81 |     texts = ["I love you", "I like you", "I don't like you", "I hate you"]
 82 |     reranked_docs, scores = hfCrossEncoderReranker.rank(query, texts)
 83 | 
 84 |     for i in range(min(len(texts), hfCrossEncoderReranker.limit) - 1):
 85 |         assert scores[i] > scores[i + 1]
 86 | 
 87 | 
 88 | def test_rank_documents_cross_encoder_custom_model(
 89 |     hfCrossEncoderRerankerWithCustomModel,
 90 | ):
 91 |     query = "I love you"
 92 |     texts = ["I love you", "I like you", "I don't like you", "I hate you"]
 93 |     reranked_docs, scores = hfCrossEncoderRerankerWithCustomModel.rank(query, texts)
 94 | 
 95 |     for i in range(min(len(texts), hfCrossEncoderRerankerWithCustomModel.limit) - 1):
 96 |         assert scores[i] > scores[i + 1]
 97 | 
 98 | 
 99 | @pytest.mark.asyncio
100 | async def test_async_rank_cross_encoder(hfCrossEncoderReranker):
101 |     docs = ["document one", "document two", "document three"]
102 |     query = "search query"
103 | 
104 |     reranked_docs, scores = await hfCrossEncoderReranker.arank(query, docs)
105 | 
106 |     assert isinstance(reranked_docs, list)
107 |     assert len(reranked_docs) == len(docs)  # Ensure we get back as many docs as we sent
108 |     assert all(isinstance(score, float) for score in scores)  # Scores should be floats
109 | 


--------------------------------------------------------------------------------
/tests/integration/test_search_results.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from redisvl.index import SearchIndex
 6 | from redisvl.query import FilterQuery
 7 | from redisvl.query.filter import Tag
 8 | 
 9 | 
10 | @pytest.fixture
11 | def filter_query():
12 |     return FilterQuery(
13 |         return_fields=None,
14 |         filter_expression=Tag("credit_score") == "high",
15 |     )
16 | 
17 | 
18 | @pytest.fixture
19 | def index(sample_data, redis_url, worker_id):
20 |     fields_spec = [
21 |         {"name": "credit_score", "type": "tag"},
22 |         {"name": "user", "type": "tag"},
23 |         {"name": "job", "type": "text"},
24 |         {"name": "age", "type": "numeric"},
25 |         {
26 |             "name": "user_embedding",
27 |             "type": "vector",
28 |             "attrs": {
29 |                 "dims": 3,
30 |                 "distance_metric": "cosine",
31 |                 "algorithm": "flat",
32 |                 "datatype": "float32",
33 |             },
34 |         },
35 |     ]
36 | 
37 |     json_schema = {
38 |         "index": {
39 |             "name": f"user_index_json_{worker_id}",
40 |             "prefix": f"users_json_{worker_id}",
41 |             "storage_type": "json",
42 |         },
43 |         "fields": fields_spec,
44 |     }
45 | 
46 |     # construct a search index from the schema
47 |     index = SearchIndex.from_dict(json_schema, redis_url=redis_url)
48 | 
49 |     # create the index (no data yet)
50 |     index.create(overwrite=True)
51 | 
52 |     # Prepare and load the data
53 |     index.load(sample_data)
54 | 
55 |     # run the test
56 |     yield index
57 | 
58 |     # clean up
59 |     index.delete(drop=True)
60 | 
61 | 
62 | def test_process_results_unpacks_json_properly(index, filter_query):
63 |     results = index.query(filter_query)
64 |     assert len(results) == 4
65 | 


--------------------------------------------------------------------------------
/tests/unit/logger_interference_checker.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | # Set up custom logging
 5 | handler = logging.StreamHandler(sys.stdout)
 6 | handler.setFormatter(
 7 |     logging.Formatter(
 8 |         "%(asctime)s %(levelname)s [%(name)s] [%(filename)s:%(lineno)s] %(message)s"
 9 |     )
10 | )
11 | 
12 | # Configure root logger
13 | root_logger = logging.getLogger()
14 | root_logger.handlers = [handler]
15 | root_logger.setLevel(logging.INFO)
16 | 
17 | # Log before import
18 | app_logger = logging.getLogger("app")
19 | app_logger.info("PRE_IMPORT_FORMAT")
20 | 
21 | # Import RedisVL
22 | from redisvl.query.filter import Text  # noqa: E402, F401
23 | 
24 | # Log after import
25 | app_logger.info("POST_IMPORT_FORMAT")
26 | 


--------------------------------------------------------------------------------
/tests/unit/test_aggregation_types.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from redis.commands.search.aggregation import AggregateRequest
  3 | from redis.commands.search.query import Query
  4 | from redis.commands.search.result import Result
  5 | 
  6 | from redisvl.index.index import process_results
  7 | from redisvl.query.aggregate import HybridQuery
  8 | from redisvl.query.filter import Tag
  9 | 
 10 | # Sample data for testing
 11 | sample_vector = [0.1, 0.2, 0.3, 0.4]
 12 | sample_text = "the toon squad play basketball against a gang of aliens"
 13 | 
 14 | 
 15 | # Test Cases
 16 | def test_aggregate_hybrid_query():
 17 |     text_field_name = "description"
 18 |     vector_field_name = "embedding"
 19 | 
 20 |     hybrid_query = HybridQuery(
 21 |         text=sample_text,
 22 |         text_field_name=text_field_name,
 23 |         vector=sample_vector,
 24 |         vector_field_name=vector_field_name,
 25 |     )
 26 | 
 27 |     assert isinstance(hybrid_query, AggregateRequest)
 28 | 
 29 |     # Check defaut properties
 30 |     assert hybrid_query._text == sample_text
 31 |     assert hybrid_query._text_field == text_field_name
 32 |     assert hybrid_query._vector == sample_vector
 33 |     assert hybrid_query._vector_field == vector_field_name
 34 |     assert hybrid_query._scorer == "BM25STD"
 35 |     assert hybrid_query._filter_expression == None
 36 |     assert hybrid_query._alpha == 0.7
 37 |     assert hybrid_query._num_results == 10
 38 |     assert hybrid_query._loadfields == []
 39 |     assert hybrid_query._dialect == 2
 40 | 
 41 |     # Check specifying properties
 42 |     scorer = "TFIDF"
 43 |     filter_expression = Tag("genre") == "comedy"
 44 |     alpha = 0.5
 45 |     num_results = 8
 46 |     return_fields = ["title", "genre", "rating"]
 47 |     stopwords = []
 48 |     dialect = 2
 49 | 
 50 |     hybrid_query = HybridQuery(
 51 |         text=sample_text,
 52 |         text_field_name=text_field_name,
 53 |         vector=sample_vector,
 54 |         vector_field_name=vector_field_name,
 55 |         text_scorer=scorer,
 56 |         filter_expression=filter_expression,
 57 |         alpha=alpha,
 58 |         num_results=num_results,
 59 |         return_fields=return_fields,
 60 |         stopwords=stopwords,
 61 |         dialect=dialect,
 62 |     )
 63 | 
 64 |     assert hybrid_query._text == sample_text
 65 |     assert hybrid_query._text_field == text_field_name
 66 |     assert hybrid_query._vector == sample_vector
 67 |     assert hybrid_query._vector_field == vector_field_name
 68 |     assert hybrid_query._scorer == scorer
 69 |     assert hybrid_query._filter_expression == filter_expression
 70 |     assert hybrid_query._alpha == 0.5
 71 |     assert hybrid_query._num_results == 8
 72 |     assert hybrid_query._loadfields == return_fields
 73 |     assert hybrid_query._dialect == 2
 74 |     assert hybrid_query.stopwords == set()
 75 | 
 76 |     # Test stopwords are configurable
 77 |     hybrid_query = HybridQuery(
 78 |         sample_text, text_field_name, sample_vector, vector_field_name, stopwords=None
 79 |     )
 80 |     assert hybrid_query.stopwords == set([])
 81 | 
 82 |     hybrid_query = HybridQuery(
 83 |         sample_text,
 84 |         text_field_name,
 85 |         sample_vector,
 86 |         vector_field_name,
 87 |         stopwords=["the", "a", "of"],
 88 |     )
 89 |     assert hybrid_query.stopwords == set(["the", "a", "of"])
 90 |     hybrid_query = HybridQuery(
 91 |         sample_text,
 92 |         text_field_name,
 93 |         sample_vector,
 94 |         vector_field_name,
 95 |         stopwords="german",
 96 |     )
 97 |     assert hybrid_query.stopwords != set([])
 98 | 
 99 |     with pytest.raises(ValueError):
100 |         hybrid_query = HybridQuery(
101 |             sample_text,
102 |             text_field_name,
103 |             sample_vector,
104 |             vector_field_name,
105 |             stopwords="gibberish",
106 |         )
107 | 
108 |     with pytest.raises(TypeError):
109 |         hybrid_query = HybridQuery(
110 |             sample_text,
111 |             text_field_name,
112 |             sample_vector,
113 |             vector_field_name,
114 |             stopwords=[1, 2, 3],
115 |         )
116 | 


--------------------------------------------------------------------------------
/tests/unit/test_base_vectorizer.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from redisvl.utils.vectorize.base import BaseVectorizer
 4 | 
 5 | 
 6 | def test_base_vectorizer_defaults():
 7 |     """
 8 |     Test that the base vectorizer defaults are set correctly, with
 9 |     a default for dtype. Versions before 0.3.8 did not have this field.
10 | 
11 |     A regression test for langchain-redis/#48
12 |     """
13 | 
14 |     class SimpleVectorizer(BaseVectorizer):
15 |         model: str = "simple"
16 |         dims: int = 10
17 | 
18 |         def embed(self, text: str, **kwargs) -> List[float]:
19 |             return [0.0] * self.dims
20 | 
21 |         async def aembed(self, text: str, **kwargs) -> List[float]:
22 |             return [0.0] * self.dims
23 | 
24 |         async def aembed_many(self, texts: List[str], **kwargs) -> List[List[float]]:
25 |             return [[0.0] * self.dims] * len(texts)
26 | 
27 |         def embed_many(self, texts: List[str], **kwargs) -> List[List[float]]:
28 |             return [[0.0] * self.dims] * len(texts)
29 | 
30 |     vectorizer = SimpleVectorizer()
31 |     assert vectorizer.model == "simple"
32 |     assert vectorizer.dims == 10
33 |     assert vectorizer.dtype == "float32"
34 | 


--------------------------------------------------------------------------------
/tests/unit/test_embedcache_schema.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pytest
  4 | from pydantic import ValidationError
  5 | 
  6 | from redisvl.extensions.cache.embeddings.schema import CacheEntry
  7 | from redisvl.redis.utils import hashify
  8 | 
  9 | 
 10 | def test_valid_cache_entry_creation():
 11 |     # Generate an entry_id first
 12 |     entry_id = hashify(f"What is AI?:text-embedding-ada-002")
 13 |     entry = CacheEntry(
 14 |         entry_id=entry_id,
 15 |         text="What is AI?",
 16 |         model_name="text-embedding-ada-002",
 17 |         embedding=[0.1, 0.2, 0.3],
 18 |     )
 19 |     assert entry.entry_id == entry_id
 20 |     assert entry.text == "What is AI?"
 21 |     assert entry.model_name == "text-embedding-ada-002"
 22 |     assert entry.embedding == [0.1, 0.2, 0.3]
 23 | 
 24 | 
 25 | def test_cache_entry_with_given_entry_id():
 26 |     entry = CacheEntry(
 27 |         entry_id="custom_id",
 28 |         text="What is AI?",
 29 |         model_name="text-embedding-ada-002",
 30 |         embedding=[0.1, 0.2, 0.3],
 31 |     )
 32 |     assert entry.entry_id == "custom_id"
 33 | 
 34 | 
 35 | def test_cache_entry_with_invalid_metadata():
 36 |     with pytest.raises(ValidationError):
 37 |         CacheEntry(
 38 |             entry_id="test_id",
 39 |             text="What is AI?",
 40 |             model_name="text-embedding-ada-002",
 41 |             embedding=[0.1, 0.2, 0.3],
 42 |             metadata="invalid_metadata",
 43 |         )
 44 | 
 45 | 
 46 | def test_cache_entry_to_dict():
 47 |     entry_id = hashify(f"What is AI?:text-embedding-ada-002")
 48 |     entry = CacheEntry(
 49 |         entry_id=entry_id,
 50 |         text="What is AI?",
 51 |         model_name="text-embedding-ada-002",
 52 |         embedding=[0.1, 0.2, 0.3],
 53 |         metadata={"author": "John"},
 54 |     )
 55 |     result = entry.to_dict()
 56 |     assert result["entry_id"] == entry_id
 57 |     assert result["text"] == "What is AI?"
 58 |     assert result["model_name"] == "text-embedding-ada-002"
 59 |     assert isinstance("embedding", str)
 60 |     assert isinstance("metadata", str)
 61 |     assert result["metadata"] == json.dumps({"author": "John"})
 62 | 
 63 | 
 64 | def test_cache_entry_deserialization():
 65 |     """Test that a CacheEntry properly deserializes data from Redis format."""
 66 |     serialized_data = {
 67 |         "entry_id": "test_id",
 68 |         "text": "What is AI?",
 69 |         "model_name": "text-embedding-ada-002",
 70 |         "embedding": json.dumps([0.1, 0.2, 0.3]),  # Serialized embedding
 71 |         "metadata": json.dumps({"source": "user_query"}),  # Serialized metadata
 72 |         "inserted_at": 1625819123.123,
 73 |     }
 74 | 
 75 |     entry = CacheEntry(**serialized_data)
 76 |     assert entry.entry_id == "test_id"
 77 |     assert entry.text == "What is AI?"
 78 |     assert entry.model_name == "text-embedding-ada-002"
 79 |     assert entry.embedding == [0.1, 0.2, 0.3]  # Should be deserialized
 80 |     assert entry.metadata == {"source": "user_query"}  # Should be deserialized
 81 |     assert entry.inserted_at == 1625819123.123
 82 | 
 83 | 
 84 | def test_cache_entry_with_empty_optional_fields():
 85 |     entry = CacheEntry(
 86 |         entry_id="test_id",
 87 |         text="What is AI?",
 88 |         model_name="text-embedding-ada-002",
 89 |         embedding=[0.1, 0.2, 0.3],
 90 |     )
 91 |     result = entry.to_dict()
 92 |     assert "metadata" not in result  # Empty metadata should be excluded
 93 | 
 94 | 
 95 | def test_cache_entry_timestamp_generation():
 96 |     """Test that inserted_at timestamp is automatically generated."""
 97 |     entry = CacheEntry(
 98 |         entry_id="test_id",
 99 |         text="What is AI?",
100 |         model_name="text-embedding-ada-002",
101 |         embedding=[0.1, 0.2, 0.3],
102 |     )
103 |     assert hasattr(entry, "inserted_at")
104 |     assert isinstance(entry.inserted_at, float)
105 | 
106 |     # The timestamp should be included in the dict representation
107 |     result = entry.to_dict()
108 |     assert "inserted_at" in result
109 |     assert isinstance(result["inserted_at"], float)
110 | 


--------------------------------------------------------------------------------
/tests/unit/test_llmcache_schema.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pytest
  4 | from pydantic import ValidationError
  5 | 
  6 | from redisvl.extensions.cache.llm import CacheEntry, CacheHit
  7 | from redisvl.redis.utils import array_to_buffer, hashify
  8 | 
  9 | 
 10 | def test_valid_cache_entry_creation():
 11 |     entry = CacheEntry(
 12 |         prompt="What is AI?",
 13 |         response="AI is artificial intelligence.",
 14 |         prompt_vector=[0.1, 0.2, 0.3],
 15 |     )
 16 |     assert entry.entry_id == hashify("What is AI?")
 17 |     assert entry.prompt == "What is AI?"
 18 |     assert entry.response == "AI is artificial intelligence."
 19 |     assert entry.prompt_vector == [0.1, 0.2, 0.3]
 20 | 
 21 | 
 22 | def test_cache_entry_with_given_entry_id():
 23 |     entry = CacheEntry(
 24 |         entry_id="custom_id",
 25 |         prompt="What is AI?",
 26 |         response="AI is artificial intelligence.",
 27 |         prompt_vector=[0.1, 0.2, 0.3],
 28 |     )
 29 |     assert entry.entry_id == "custom_id"
 30 | 
 31 | 
 32 | def test_cache_entry_with_invalid_metadata():
 33 |     with pytest.raises(ValidationError):
 34 |         CacheEntry(
 35 |             prompt="What is AI?",
 36 |             response="AI is artificial intelligence.",
 37 |             prompt_vector=[0.1, 0.2, 0.3],
 38 |             metadata="invalid_metadata",
 39 |         )
 40 | 
 41 | 
 42 | def test_cache_entry_to_dict():
 43 |     entry = CacheEntry(
 44 |         prompt="What is AI?",
 45 |         response="AI is artificial intelligence.",
 46 |         prompt_vector=[0.1, 0.2, 0.3],
 47 |         metadata={"author": "John"},
 48 |         filters={"category": "technology"},
 49 |     )
 50 |     result = entry.to_dict(dtype="float32")
 51 |     assert result["entry_id"] == hashify("What is AI?", {"category": "technology"})
 52 |     assert result["metadata"] == json.dumps({"author": "John"})
 53 |     assert result["prompt_vector"] == array_to_buffer([0.1, 0.2, 0.3], "float32")
 54 |     assert result["category"] == "technology"
 55 |     assert "filters" not in result
 56 | 
 57 | 
 58 | def test_valid_cache_hit_creation():
 59 |     hit = CacheHit(
 60 |         entry_id="entry_1",
 61 |         prompt="What is AI?",
 62 |         response="AI is artificial intelligence.",
 63 |         vector_distance=0.1,
 64 |         inserted_at=1625819123.123,
 65 |         updated_at=1625819123.123,
 66 |     )
 67 |     assert hit.entry_id == "entry_1"
 68 |     assert hit.prompt == "What is AI?"
 69 |     assert hit.response == "AI is artificial intelligence."
 70 |     assert hit.vector_distance == 0.1
 71 |     assert hit.inserted_at == hit.updated_at == 1625819123.123
 72 | 
 73 | 
 74 | def test_cache_hit_with_serialized_metadata():
 75 |     hit = CacheHit(
 76 |         entry_id="entry_1",
 77 |         prompt="What is AI?",
 78 |         response="AI is artificial intelligence.",
 79 |         vector_distance=0.1,
 80 |         inserted_at=1625819123.123,
 81 |         updated_at=1625819123.123,
 82 |         metadata=json.dumps({"author": "John"}),
 83 |     )
 84 |     assert hit.metadata == {"author": "John"}
 85 | 
 86 | 
 87 | def test_cache_hit_to_dict():
 88 |     hit = CacheHit(
 89 |         entry_id="entry_1",
 90 |         prompt="What is AI?",
 91 |         response="AI is artificial intelligence.",
 92 |         vector_distance=0.1,
 93 |         inserted_at=1625819123.123,
 94 |         updated_at=1625819123.123,
 95 |         filters={"category": "technology"},
 96 |     )
 97 |     result = hit.to_dict()
 98 |     assert result["entry_id"] == "entry_1"
 99 |     assert result["prompt"] == "What is AI?"
100 |     assert result["response"] == "AI is artificial intelligence."
101 |     assert result["vector_distance"] == 0.1
102 |     assert result["category"] == "technology"
103 |     assert "filters" not in result
104 | 
105 | 
106 | def test_cache_entry_with_empty_optional_fields():
107 |     entry = CacheEntry(
108 |         prompt="What is AI?",
109 |         response="AI is artificial intelligence.",
110 |         prompt_vector=[0.1, 0.2, 0.3],
111 |     )
112 |     result = entry.to_dict(dtype="float32")
113 |     assert "metadata" not in result
114 |     assert "filters" not in result
115 | 
116 | 
117 | def test_cache_hit_with_empty_optional_fields():
118 |     hit = CacheHit(
119 |         entry_id="entry_1",
120 |         prompt="What is AI?",
121 |         response="AI is artificial intelligence.",
122 |         vector_distance=0.1,
123 |         inserted_at=1625819123.123,
124 |         updated_at=1625819123.123,
125 |     )
126 |     result = hit.to_dict()
127 |     assert "metadata" not in result
128 |     assert "filters" not in result
129 | 


--------------------------------------------------------------------------------
/tests/unit/test_message_history_schema.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pydantic import ValidationError
  3 | 
  4 | from redisvl.extensions.message_history.schema import ChatMessage
  5 | from redisvl.redis.utils import array_to_buffer
  6 | from redisvl.utils.utils import create_ulid, current_timestamp
  7 | 
  8 | 
  9 | def test_chat_message_creation():
 10 |     session_tag = create_ulid()
 11 |     timestamp = current_timestamp()
 12 |     content = "Hello, world!"
 13 | 
 14 |     chat_message = ChatMessage(
 15 |         entry_id=f"{session_tag}:{timestamp}",
 16 |         role="user",
 17 |         content=content,
 18 |         session_tag=session_tag,
 19 |         timestamp=timestamp,
 20 |     )
 21 | 
 22 |     assert chat_message.entry_id == f"{session_tag}:{timestamp}"
 23 |     assert chat_message.role == "user"
 24 |     assert chat_message.content == content
 25 |     assert chat_message.session_tag == session_tag
 26 |     assert chat_message.timestamp == timestamp
 27 |     assert chat_message.tool_call_id is None
 28 |     assert chat_message.vector_field is None
 29 | 
 30 | 
 31 | def test_chat_message_default_id_generation():
 32 |     session_tag = create_ulid()
 33 |     timestamp = current_timestamp()
 34 |     content = "Hello, world!"
 35 | 
 36 |     chat_message = ChatMessage(
 37 |         role="user",
 38 |         content=content,
 39 |         session_tag=session_tag,
 40 |         timestamp=timestamp,
 41 |     )
 42 | 
 43 |     assert chat_message.entry_id == f"{session_tag}:{timestamp}"
 44 | 
 45 | 
 46 | def test_chat_message_with_tool_call_id():
 47 |     session_tag = create_ulid()
 48 |     timestamp = current_timestamp()
 49 |     content = "Hello, world!"
 50 |     tool_call_id = create_ulid()
 51 | 
 52 |     chat_message = ChatMessage(
 53 |         entry_id=f"{session_tag}:{timestamp}",
 54 |         role="user",
 55 |         content=content,
 56 |         session_tag=session_tag,
 57 |         timestamp=timestamp,
 58 |         tool_call_id=tool_call_id,
 59 |     )
 60 | 
 61 |     assert chat_message.tool_call_id == tool_call_id
 62 | 
 63 | 
 64 | def test_chat_message_with_vector_field():
 65 |     session_tag = create_ulid()
 66 |     timestamp = current_timestamp()
 67 |     content = "Hello, world!"
 68 |     vector_field = [0.1, 0.2, 0.3]
 69 | 
 70 |     chat_message = ChatMessage(
 71 |         entry_id=f"{session_tag}:{timestamp}",
 72 |         role="user",
 73 |         content=content,
 74 |         session_tag=session_tag,
 75 |         timestamp=timestamp,
 76 |         vector_field=vector_field,
 77 |     )
 78 | 
 79 |     assert chat_message.vector_field == vector_field
 80 | 
 81 | 
 82 | def test_chat_message_to_dict():
 83 |     session_tag = create_ulid()
 84 |     timestamp = current_timestamp()
 85 |     content = "Hello, world!"
 86 |     vector_field = [0.1, 0.2, 0.3]
 87 | 
 88 |     chat_message = ChatMessage(
 89 |         entry_id=f"{session_tag}:{timestamp}",
 90 |         role="user",
 91 |         content=content,
 92 |         session_tag=session_tag,
 93 |         timestamp=timestamp,
 94 |         vector_field=vector_field,
 95 |     )
 96 | 
 97 |     data = chat_message.to_dict(dtype="float32")
 98 | 
 99 |     assert data["entry_id"] == f"{session_tag}:{timestamp}"
100 |     assert data["role"] == "user"
101 |     assert data["content"] == content
102 |     assert data["session_tag"] == session_tag
103 |     assert data["timestamp"] == timestamp
104 |     assert data["vector_field"] == array_to_buffer(vector_field, "float32")
105 | 
106 | 
107 | def test_chat_message_missing_fields():
108 |     session_tag = create_ulid()
109 |     timestamp = current_timestamp()
110 |     content = "Hello, world!"
111 | 
112 |     with pytest.raises(ValidationError):
113 |         ChatMessage(
114 |             content=content,
115 |             session_tag=session_tag,
116 |             timestamp=timestamp,
117 |         )
118 | 
119 | 
120 | def test_chat_message_invalid_role():
121 |     session_tag = create_ulid()
122 |     timestamp = current_timestamp()
123 |     content = "Hello, world!"
124 | 
125 |     with pytest.raises(ValidationError):
126 |         ChatMessage(
127 |             entry_id=f"{session_tag}:{timestamp}",
128 |             role=[1, 2, 3],  # Invalid role type
129 |             content=content,
130 |             session_tag=session_tag,
131 |             timestamp=timestamp,
132 |         )
133 | 


--------------------------------------------------------------------------------
/tests/unit/test_route_schema.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pydantic import ValidationError
  3 | 
  4 | from redisvl.extensions.router.schema import (
  5 |     DistanceAggregationMethod,
  6 |     Route,
  7 |     RouteMatch,
  8 |     RoutingConfig,
  9 | )
 10 | 
 11 | 
 12 | def test_route_valid():
 13 |     route = Route(
 14 |         name="Test Route",
 15 |         references=["reference1", "reference2"],
 16 |         metadata={"key": "value"},
 17 |         distance_threshold=0.3,
 18 |     )
 19 |     assert route.name == "Test Route"
 20 |     assert route.references == ["reference1", "reference2"]
 21 |     assert route.metadata == {"key": "value"}
 22 |     assert route.distance_threshold == 0.3
 23 | 
 24 | 
 25 | def test_route_empty_name():
 26 |     with pytest.raises(ValidationError) as excinfo:
 27 |         Route(
 28 |             name="",
 29 |             references=["reference1", "reference2"],
 30 |             metadata={"key": "value"},
 31 |             distance_threshold=0.3,
 32 |         )
 33 |     assert "Route name must not be empty" in str(excinfo.value)
 34 | 
 35 | 
 36 | def test_route_empty_references():
 37 |     with pytest.raises(ValidationError) as excinfo:
 38 |         Route(
 39 |             name="Test Route",
 40 |             references=[],
 41 |             metadata={"key": "value"},
 42 |             distance_threshold=0.3,
 43 |         )
 44 |     assert "References must not be empty" in str(excinfo.value)
 45 | 
 46 | 
 47 | def test_route_non_empty_references():
 48 |     with pytest.raises(ValidationError) as excinfo:
 49 |         Route(
 50 |             name="Test Route",
 51 |             references=["reference1", ""],
 52 |             metadata={"key": "value"},
 53 |             distance_threshold=0.3,
 54 |         )
 55 |     assert "All references must be non-empty strings" in str(excinfo.value)
 56 | 
 57 | 
 58 | def test_route_valid_no_threshold():
 59 |     route = Route(
 60 |         name="Test Route",
 61 |         references=["reference1", "reference2"],
 62 |         metadata={"key": "value"},
 63 |     )
 64 |     assert route.name == "Test Route"
 65 |     assert route.references == ["reference1", "reference2"]
 66 |     assert route.metadata == {"key": "value"}
 67 | 
 68 | 
 69 | def test_route_invalid_threshold_zero():
 70 |     with pytest.raises(ValidationError) as excinfo:
 71 |         Route(
 72 |             name="Test Route",
 73 |             references=["reference1", "reference2"],
 74 |             metadata={"key": "value"},
 75 |             distance_threshold=0,
 76 |         )
 77 |     assert "Input should be greater than 0" in str(excinfo.value)
 78 | 
 79 | 
 80 | def test_route_invalid_threshold_negative():
 81 |     with pytest.raises(ValidationError) as excinfo:
 82 |         Route(
 83 |             name="Test Route",
 84 |             references=["reference1", "reference2"],
 85 |             metadata={"key": "value"},
 86 |             distance_threshold=-0.1,
 87 |         )
 88 |     assert "Input should be greater than 0" in str(excinfo.value)
 89 | 
 90 | 
 91 | def test_route_match():
 92 |     route_match = RouteMatch(name="test", distance=0.25)
 93 |     assert route_match.name == "test"
 94 |     assert route_match.distance == 0.25
 95 | 
 96 | 
 97 | def test_route_match_no_route():
 98 |     route_match = RouteMatch()
 99 |     assert route_match.name is None
100 |     assert route_match.distance is None
101 | 
102 | 
103 | def test_distance_aggregation_method():
104 |     assert DistanceAggregationMethod.avg == DistanceAggregationMethod("avg")
105 |     assert DistanceAggregationMethod.min == DistanceAggregationMethod("min")
106 |     assert DistanceAggregationMethod.sum == DistanceAggregationMethod("sum")
107 | 
108 | 
109 | def test_routing_config_valid():
110 |     config = RoutingConfig(aggregation_method=DistanceAggregationMethod.min, max_k=5)
111 |     assert config.aggregation_method == DistanceAggregationMethod("min")
112 |     assert config.max_k == 5
113 | 
114 | 
115 | def test_routing_config_invalid_max_k():
116 |     with pytest.raises(ValidationError) as excinfo:
117 |         RoutingConfig(max_k=0)
118 |     assert "Input should be greater than 0" in str(excinfo.value)
119 | 


--------------------------------------------------------------------------------
/tests/unit/test_storage.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from pydantic import ValidationError
  3 | 
  4 | from redisvl.exceptions import SchemaValidationError
  5 | from redisvl.index.storage import BaseStorage, HashStorage, JsonStorage
  6 | from redisvl.schema import IndexSchema
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def sample_hash_schema():
 11 |     """Create a sample schema with HASH storage for testing."""
 12 |     schema_dict = {
 13 |         "index": {
 14 |             "name": "test-hash-index",
 15 |             "prefix": "test",
 16 |             "key_separator": ":",
 17 |             "storage_type": "hash",
 18 |         },
 19 |         "fields": [
 20 |             {"name": "test_id", "type": "tag"},
 21 |             {"name": "title", "type": "text"},
 22 |             {"name": "user", "type": "tag"},
 23 |             {"name": "rating", "type": "numeric"},
 24 |             {"name": "location", "type": "geo"},
 25 |             {
 26 |                 "name": "embedding",
 27 |                 "type": "vector",
 28 |                 "attrs": {
 29 |                     "algorithm": "flat",
 30 |                     "dims": 4,
 31 |                     "datatype": "float32",
 32 |                     "distance_metric": "cosine",
 33 |                 },
 34 |             },
 35 |             {
 36 |                 "name": "int_vector",
 37 |                 "type": "vector",
 38 |                 "attrs": {
 39 |                     "algorithm": "flat",
 40 |                     "dims": 3,
 41 |                     "datatype": "int8",
 42 |                     "distance_metric": "l2",
 43 |                 },
 44 |             },
 45 |         ],
 46 |     }
 47 |     return IndexSchema.from_dict(schema_dict)
 48 | 
 49 | 
 50 | @pytest.fixture
 51 | def sample_json_schema():
 52 |     """Create a sample schema with JSON storage for testing."""
 53 |     schema_dict = {
 54 |         "index": {
 55 |             "name": "test-json-index",
 56 |             "prefix": "test",
 57 |             "key_separator": ":",
 58 |             "storage_type": "json",
 59 |         },
 60 |         "fields": [
 61 |             {"name": "test_id", "type": "tag"},
 62 |             {"name": "user", "type": "tag"},
 63 |             {"name": "title", "type": "text"},
 64 |             {"name": "rating", "type": "numeric"},
 65 |             {"name": "location", "type": "geo"},
 66 |             {
 67 |                 "name": "embedding",
 68 |                 "type": "vector",
 69 |                 "attrs": {
 70 |                     "algorithm": "flat",
 71 |                     "dims": 4,
 72 |                     "datatype": "float32",
 73 |                     "distance_metric": "cosine",
 74 |                 },
 75 |             },
 76 |             {
 77 |                 "name": "int_vector",
 78 |                 "type": "vector",
 79 |                 "attrs": {
 80 |                     "algorithm": "flat",
 81 |                     "dims": 3,
 82 |                     "datatype": "int8",
 83 |                     "distance_metric": "l2",
 84 |                 },
 85 |             },
 86 |         ],
 87 |     }
 88 |     return IndexSchema.from_dict(schema_dict)
 89 | 
 90 | 
 91 | @pytest.fixture(params=[JsonStorage, HashStorage])
 92 | def storage_instance(request, sample_hash_schema, sample_json_schema):
 93 |     StorageClass = request.param
 94 |     if isinstance(StorageClass, JsonStorage):
 95 |         return StorageClass(index_schema=sample_json_schema)
 96 |     return StorageClass(index_schema=sample_hash_schema)
 97 | 
 98 | 
 99 | def test_key_formatting(storage_instance):
100 |     key = "1234"
101 |     generated_key = storage_instance._key(key, "", "")
102 |     assert generated_key == key, "The generated key does not match the expected format."
103 |     generated_key = storage_instance._key(key, "", ":")
104 |     assert generated_key == key, "The generated key does not match the expected format."
105 |     generated_key = storage_instance._key(key, "test", ":")
106 |     assert (
107 |         generated_key == f"test:{key}"
108 |     ), "The generated key does not match the expected format."
109 | 
110 | 
111 | def test_create_key(storage_instance):
112 |     id_field = "id"
113 |     obj = {id_field: "1234"}
114 |     expected_key = f"{storage_instance.index_schema.index.prefix}{storage_instance.index_schema.index.key_separator}{obj[id_field]}"
115 |     generated_key = storage_instance._create_key(obj, id_field)
116 |     assert (
117 |         generated_key == expected_key
118 |     ), "The generated key does not match the expected format."
119 | 
120 | 
121 | def test_validate_success(storage_instance):
122 |     try:
123 |         storage_instance._validate(
124 |             {"test_id": "1234", "rating": 5, "user": "john", "title": "engineer"}
125 |         )
126 |     except Exception as e:
127 |         pytest.fail(f"_validate should not raise an exception here, but raised {e}")
128 | 
129 | 
130 | def test_validate_failure(storage_instance):
131 |     data = {"title": 5}
132 |     with pytest.raises(ValidationError):
133 |         storage_instance._validate(data)
134 | 
135 |     data = {"user": [1]}
136 |     with pytest.raises(ValidationError):
137 |         storage_instance._validate(data)
138 | 
139 | 
140 | def test_validate_preprocess_and_validate_failure(storage_instance):
141 |     data = {"title": 5}
142 |     data == storage_instance._preprocess_and_validate_objects(
143 |         objects=[data], validate=False
144 |     )
145 |     with pytest.raises(SchemaValidationError):
146 |         storage_instance._preprocess_and_validate_objects(objects=[data], validate=True)
147 | 
148 |     data = {"user": [1]}
149 |     data == storage_instance._preprocess_and_validate_objects(
150 |         objects=[data], validate=False
151 |     )
152 |     with pytest.raises(SchemaValidationError):
153 |         storage_instance._preprocess_and_validate_objects(objects=[data], validate=True)
154 | 
155 | 
156 | def test_preprocess(storage_instance):
157 |     data = {"key": "value"}
158 |     preprocessed_data = storage_instance._preprocess(obj=data, preprocess=None)
159 |     assert preprocessed_data == data
160 | 
161 |     def fn(d):
162 |         d["foo"] = "bar"
163 |         return d
164 | 
165 |     preprocessed_data = storage_instance._preprocess(obj=data, preprocess=fn)
166 |     assert "foo" in preprocessed_data
167 |     assert preprocessed_data["foo"] == "bar"
168 | 


--------------------------------------------------------------------------------
/tests/unit/test_threshold_optimizer_utility.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | if sys.version_info.major == 3 and sys.version_info.minor < 10:
 6 |     pytest.skip("Test requires Python 3.10 or higher", allow_module_level=True)
 7 | 
 8 | from redisvl.utils.utils import lazy_import
 9 | 
10 | evaluate = lazy_import("ranx.evaluate")
11 | 
12 | from redisvl.utils.optimize import LabeledData
13 | from redisvl.utils.optimize.cache import _generate_run_cache
14 | from redisvl.utils.optimize.utils import _format_qrels
15 | 
16 | # Note: these tests are not intended to test ranx but to test that our data formatting for the package is correct
17 | 
18 | 
19 | def test_known_precision_case():
20 |     """
21 |     Test case with known precision value.
22 | 
23 |     Setup:
24 |     - 2 queries
25 |     - Query 1 expects doc1, gets doc1 and doc2 (precision 0.5)
26 |     - Query 2 expects doc3, gets doc3 (precision 1.0)
27 |     Expected overall precision: 0.75
28 |     """
29 |     # Setup test data
30 |     test_data = [
31 |         LabeledData(
32 |             query="test query 1",
33 |             query_match="doc1",
34 |             response=[
35 |                 {"id": "doc1", "vector_distance": 0.2},
36 |                 {"id": "doc2", "vector_distance": 0.3},
37 |             ],
38 |         ),
39 |         LabeledData(
40 |             query="test query 2",
41 |             query_match="doc3",
42 |             response=[
43 |                 {"id": "doc3", "vector_distance": 0.2},
44 |                 {"id": "doc4", "vector_distance": 0.8},
45 |             ],
46 |         ),
47 |     ]
48 | 
49 |     # Create qrels (ground truth)
50 |     qrels = _format_qrels(test_data)
51 | 
52 |     threshold = 0.4
53 |     run = _generate_run_cache(test_data, threshold)
54 | 
55 |     # Calculate precision using ranx
56 |     precision = evaluate(qrels, run, "precision")
57 |     assert precision == 0.75  # (0.5 + 1.0) / 2
58 | 
59 | 
60 | def test_known_precision_with_no_matches():
61 |     """Test case where some queries have no matches."""
62 |     test_data = [
63 |         LabeledData(
64 |             query="test query 2",
65 |             query_match="",  # Expecting no match
66 |             response=[],
67 |         ),
68 |     ]
69 | 
70 |     # Create qrels
71 |     qrels = _format_qrels(test_data)
72 | 
73 |     # Generate run with threshold that excludes all docs for first query
74 |     threshold = 0.3
75 |     run = _generate_run_cache(test_data, threshold)
76 | 
77 |     # Calculate precision
78 |     precision = evaluate(qrels, run, "precision")
79 |     assert precision == 1.0  # (0.0 + 1.0) / 2
80 | 


--------------------------------------------------------------------------------
/tests/unit/test_token_escaper.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from redisvl.utils.token_escaper import TokenEscaper
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def escaper():
  8 |     return TokenEscaper()
  9 | 
 10 | 
 11 | @pytest.mark.parametrize(
 12 |     ("test_input,expected"),
 13 |     [
 14 |         (r"a [big] test.", r"a\ \[big\]\ test\."),
 15 |         (r"hello, world!", r"hello\,\ world\!"),
 16 |         (
 17 |             r'special "quotes" (and parentheses)',
 18 |             r"special\ \"quotes\"\ \(and\ parentheses\)",
 19 |         ),
 20 |         (
 21 |             r"& symbols, like * and ?",
 22 |             r"\&\ symbols\,\ like\ \*\ and\ ?",
 23 |         ),  # TODO: question marks are not caught?
 24 |         # underscores are ignored
 25 |         (r"-dashes_and_underscores-", r"\-dashes_and_underscores\-"),
 26 |     ],
 27 |     ids=["brackets", "commas", "quotes", "symbols", "underscores"],
 28 | )
 29 | def test_escape_text_chars(escaper, test_input, expected):
 30 |     assert escaper.escape(test_input) == expected
 31 | 
 32 | 
 33 | @pytest.mark.parametrize(
 34 |     ("test_input,expected"),
 35 |     [
 36 |         # Simple tags
 37 |         ("user:name", r"user\:name"),
 38 |         ("123#comment", r"123\#comment"),
 39 |         ("hyphen-separated", r"hyphen\-separated"),
 40 |         # Tags with special characters
 41 |         ("price$", r"price\$"),
 42 |         ("super*star", r"super\*star"),
 43 |         ("tag&value", r"tag\&value"),
 44 |         ("@username", r"\@username"),
 45 |         # Space-containing tags often used in search scenarios
 46 |         ("San Francisco", r"San\ Francisco"),
 47 |         ("New Zealand", r"New\ Zealand"),
 48 |         # Multi-special-character tags
 49 |         ("complex/tag:value", r"complex\/tag\:value"),
 50 |         ("$special$tag$", r"\$special\$tag\$"),
 51 |         ("tag-with-hyphen", r"tag\-with\-hyphen"),
 52 |         # Tags with less common, but legal characters
 53 |         ("_underscore_", r"_underscore_"),
 54 |         ("dot.tag", r"dot\.tag"),
 55 |         # ("pipe|tag", r"pipe\|tag"), #TODO - pipes are not caught?
 56 |         # More edge cases with special characters
 57 |         ("(parentheses)", r"\(parentheses\)"),
 58 |         ("[brackets]", r"\[brackets\]"),
 59 |         ("{braces}", r"\{braces\}"),
 60 |         # ("question?mark", r"question\?mark"),  #TODO - question marks are not caught?
 61 |         # Unicode characters in tags
 62 |         ("你好", r"你好"),  # Assuming non-Latin characters don't need escaping
 63 |         ("emoji:😊", r"emoji\:😊"),
 64 |         # ...other cases as needed...
 65 |     ],
 66 |     ids=[
 67 |         ":",
 68 |         "#",
 69 |         "-",
 70 |         "$",
 71 |         "*",
 72 |         "&",
 73 |         "@",
 74 |         "space",
 75 |         "space-2",
 76 |         "complex",
 77 |         "special",
 78 |         "hyphen",
 79 |         "underscore",
 80 |         "dot",
 81 |         "parentheses",
 82 |         "brackets",
 83 |         "braces",
 84 |         "non-latin",
 85 |         "emoji",
 86 |     ],
 87 | )
 88 | def test_escape_tag_like_values(escaper, test_input, expected):
 89 |     assert escaper.escape(test_input) == expected
 90 | 
 91 | 
 92 | @pytest.mark.parametrize("test_input", [123, 45.67, None, [], {}])
 93 | def test_escape_non_string_input(escaper, test_input):
 94 |     with pytest.raises(TypeError):
 95 |         escaper.escape(test_input)
 96 | 
 97 | 
 98 | @pytest.mark.parametrize(
 99 |     "test_input,expected",
100 |     [
101 |         # ('你好，世界！', r'你好\，世界\！'), # TODO - non latin chars?
102 |         ("😊 ❤️ 👍", r"😊\ ❤️\ 👍"),
103 |         # ...other cases as needed...
104 |     ],
105 |     ids=["emoji"],
106 | )
107 | def test_escape_unicode_characters(escaper, test_input, expected):
108 |     assert escaper.escape(test_input) == expected
109 | 
110 | 
111 | def test_escape_empty_string(escaper):
112 |     assert escaper.escape("") == ""
113 | 
114 | 
115 | def test_escape_long_string(escaper):
116 |     # Construct a very long string
117 |     long_str = "a," * 1000  # This creates a string "a,a,a,a,...a,"
118 |     expected = r"a\," * 1000  # Expected escaped string
119 | 
120 |     # Use pytest's benchmark fixture to check performance
121 |     escaped = escaper.escape(long_str)
122 |     assert escaped == expected
123 | 


--------------------------------------------------------------------------------