├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── llm_ollama ├── __init__.py ├── auth.py └── cache.py ├── pyproject.toml └── tests ├── conftest.py ├── test_auth.py ├── test_cache.py ├── test_ollama_integration.py └── test_ollama_unit.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | cache-dependency-path: setup.py 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test]' 27 | - name: Run tests 28 | run: | 29 | python -m pytest 30 | deploy: 31 | runs-on: ubuntu-latest 32 | environment: release 33 | permissions: 34 | id-token: write 35 | needs: [test] 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: "3.13" 42 | cache: pip 43 | cache-dependency-path: setup.py 44 | - name: Install dependencies 45 | run: | 46 | pip install setuptools wheel build 47 | - name: Build 48 | run: | 49 | python -m build 50 | - name: Publish package distributions to PyPI 51 | uses: pypa/gh-action-pypi-publish@release/v1 52 | with: 53 | packages-dir: ./dist 54 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: setup.py 22 | - name: Install dependencies 23 | run: | 24 | pip install -e '.[lint,test]' 25 | - name: Check code formatting 26 | run: | 27 | python -m black --check --diff . 28 | - name: Run tests 29 | run: | 30 | python -m pytest 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # Distribution / packaging 6 | *.egg-info/ 7 | 8 | # Unit test / coverage reports 9 | .pytest_cache/ 10 | 11 | # Environments 12 | venv/ 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # llm-ollama 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/llm-ollama.svg)](https://pypi.org/project/llm-ollama/) 4 | [![Changelog](https://img.shields.io/github/v/release/taketwo/llm-ollama?include_prereleases&label=changelog)](https://github.com/taketwo/llm-ollama/releases) 5 | [![Tests](https://github.com/taketwo/llm-ollama/actions/workflows/test.yml/badge.svg)](https://github.com/taketwo/llm-ollama/actions/workflows/test.yml) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/taketwo/llm-ollama/blob/main/LICENSE) 7 | 8 | [LLM](https://llm.datasette.io/) plugin providing access to models running on an [Ollama](https://ollama.ai) server. 9 | 10 | ## Installation 11 | 12 | Install this plugin in the same environment as [LLM](https://llm.datasette.io/). 13 | 14 | ```bash 15 | llm install llm-ollama 16 | ``` 17 | 18 | ## Usage 19 | 20 | First, ensure that the Ollama server is running and that you have pulled some models. You can use `ollama list` to check what is locally available. 21 | 22 | The plugin will query the Ollama server for the list of models. You can use `llm ollama list-models` to see the list; it should be the same as output by `ollama list`. All these models will be automatically registered with LLM and made available for prompting, chatting, and embedding. 23 | 24 | Assuming you have `llama2:latest` available, you can run a prompt using: 25 | 26 | ```bash 27 | llm -m llama2:latest 'How much is 2+2?' 28 | ``` 29 | 30 | The plugin automatically creates a short alias for models that have `:latest` in the name, so the previous command is equivalent to running: 31 | 32 | ```bash 33 | llm -m llama2 'How much is 2+2?' 34 | ``` 35 | 36 | To start an interactive chat session: 37 | 38 | ```bash 39 | llm chat -m llama2 40 | ``` 41 | ``` 42 | Chatting with llama2:latest 43 | Type 'exit' or 'quit' to exit 44 | Type '!multi' to enter multiple lines, then '!end' to finish 45 | > 46 | ``` 47 | 48 | ### Image attachments 49 | 50 | Multi-modal Ollama models can accept image attachments using the [LLM attachments](https://llm.datasette.io/en/stable/usage.html#attachments) option: 51 | 52 | ```bash 53 | llm -m llava "Describe this image" -a https://static.simonwillison.net/static/2024/pelicans.jpg 54 | ``` 55 | 56 | ### Tools 57 | 58 | Some models such as [Qwen 3](https://ollama.com/library/qwen3) support [tools](https://llm.datasette.io/en/stable/tools.html). Ollama have a [list of tool supporting models](https://ollama.com/search?c=tools). 59 | ```bash 60 | ollama pull qwen3:4b 61 | 62 | llm -m qwen3:4b -T llm_time 'What is the time?' --td 63 | ``` 64 | 65 | ### Embeddings 66 | 67 | The plugin supports [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/cli.html). Both regular and specialized embedding models (such as `mxbai-embed-large`) can be used: 68 | 69 | ```bash 70 | llm embed -m mxbai-embed-large -i README.md 71 | ``` 72 | 73 | By default, the input will be truncated from the end to fit within the context length. This behavior can be changed by setting `OLLAMA_EMBED_TRUNCATE=no` environment variable. In such cases, embedding operation will fail if the context length is exceeded. 74 | 75 | ### JSON schemas 76 | 77 | Ollama's built-in support for [structured outputs](https://ollama.com/blog/structured-outputs) can be accessed through [LLM schemas](https://llm.datasette.io/en/stable/schemas.html), for example: 78 | 79 | ```bash 80 | llm -m llama3.2 --schema "name, age int, one_sentence_bio" "invent a cool dog" 81 | ``` 82 | 83 | ### Async models 84 | 85 | The plugin registers [async LLM models](https://llm.datasette.io/en/stable/python-api.html#async-models) suitable for use with Python [asyncio](https://docs.python.org/3/library/asyncio.html). 86 | 87 | To utilize an async model, retrieve it using `llm.get_async_model()` function instead of `llm.get_model()` and then await the response: 88 | 89 | ```python 90 | import asyncio, llm 91 | 92 | async def run(): 93 | model = llm.get_async_model("llama3.2:latest") 94 | response = model.prompt("A short poem about tea") 95 | print(await response.text()) 96 | 97 | asyncio.run(run()) 98 | ``` 99 | 100 | ## Model aliases 101 | 102 | The same Ollama model may be referred by several names with different tags. For example, in the following list, there is a single unique model with three different names: 103 | 104 | ```bash 105 | ollama list 106 | NAME ID SIZE MODIFIED 107 | stable-code:3b aa5ab8afb862 1.6 GB 9 hours ago 108 | stable-code:code aa5ab8afb862 1.6 GB 9 seconds ago 109 | stable-code:latest aa5ab8afb862 1.6 GB 14 seconds ago 110 | ``` 111 | 112 | In such cases, the plugin will register a single model and create additional aliases. Continuing the previous example, this is what LLM will have: 113 | 114 | ```bash 115 | llm models 116 | ... 117 | 118 | Ollama: stable-code:3b (aliases: stable-code:code, stable-code:latest, stable-code) 119 | ``` 120 | 121 | ## Model options 122 | 123 | All models accept [Ollama modelfile parameters](https://github.com/ollama/ollama/blob/main/docs/modelfile.md#parameter) as options. Use the `-o name value` syntax to specify them, for example: 124 | 125 | - `-o temperature 0.8`: set the temperature of the model 126 | - `-o num_ctx 256000`: set the size of the context window used to generate the next token 127 | 128 | See the referenced page for the complete list with descriptions and default values. 129 | 130 | Additionally, the `-o json_object 1` option can be used to force the model to reply with a valid JSON object. Note that your prompt must mention JSON for this to work. 131 | 132 | ## Ollama server address 133 | 134 | `llm-ollama` will try to connect to a server at the default `localhost:11434` address. If your Ollama server is remote or runs on a non-default port, you can use `OLLAMA_HOST` environment variable to point the plugin to it, e.g.: 135 | 136 | ```bash 137 | export OLLAMA_HOST=https://192.168.1.13:11434 138 | ``` 139 | 140 | ### Authentication 141 | 142 | If your Ollama server is protected with Basic Authentication, you can include the credentials directly in the `OLLAMA_HOST` environment variable: 143 | 144 | ```bash 145 | export OLLAMA_HOST=https://username:password@192.168.1.13:11434 146 | ``` 147 | 148 | The plugin will parse the credentials and use them for authentication. Special characters in usernames or passwords should be URL-encoded: 149 | 150 | ```bash 151 | # For username "user@domain" and password "p@ssw0rd" 152 | export OLLAMA_HOST=https://user%40domain:p%40ssw0rd@192.168.1.13:11434 153 | ``` 154 | 155 | ## Development 156 | 157 | ### Setup 158 | 159 | To set up this plugin locally, first checkout the code. Then create a new virtual environment and install the dependencies. If you are using `uv`: 160 | 161 | ```bash 162 | cd llm-ollama 163 | uv venv 164 | uv pip install -e '.[test,lint]' 165 | ``` 166 | 167 | Otherwise, if you prefer using standard tools: 168 | 169 | ```bash 170 | cd llm-ollama 171 | python3 -m venv .venv 172 | pip install -e '.[test,lint]' 173 | ``` 174 | 175 | ### Testing and linting 176 | 177 | To test or lint the code, first activate the environment: 178 | 179 | ```bash 180 | source .venv/bin/activate 181 | ``` 182 | 183 | To run unit and integration tests: 184 | 185 | ```bash 186 | python -m pytest 187 | ``` 188 | 189 | Integration tests require a running Ollama server and will be: 190 | - Enabled automatically if an Ollama server is available; 191 | - Skipped if Ollama server is unavailable; 192 | - Force-enabled with `--integration` (but fail if Ollama server is unavailable); 193 | - Force-disabled with `--no-integration`. 194 | 195 | To format the code: 196 | 197 | ```bash 198 | python -m black . 199 | ``` 200 | -------------------------------------------------------------------------------- /llm_ollama/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | import warnings 4 | from collections import defaultdict 5 | from dataclasses import dataclass 6 | from typing import List, Optional, Tuple 7 | 8 | import click 9 | import llm 10 | import ollama 11 | from llm_ollama.auth import get_async_client, get_client 12 | 13 | from llm_ollama.cache import Cache 14 | 15 | from pydantic import Field, TypeAdapter, ValidationError 16 | 17 | cache = Cache(llm.user_dir() / "llm-ollama" / "cache") 18 | 19 | 20 | @llm.hookimpl 21 | def register_commands(cli): 22 | @cli.group(name="ollama") 23 | def ollama_group(): 24 | "Commands for working with models hosted on Ollama" 25 | 26 | @ollama_group.command(name="list-models") 27 | def list_models(): 28 | """List models that are available locally on Ollama server.""" 29 | for model in _get_ollama_models(): 30 | click.echo(model["model"]) 31 | 32 | 33 | @llm.hookimpl 34 | def register_models(register): 35 | models = defaultdict(list) 36 | for model in _get_ollama_models(): 37 | name, digest = model["model"], model["digest"] 38 | models[digest].append(name) 39 | if name.endswith(":latest"): 40 | models[digest].append(name[: -len(":latest")]) 41 | for digest, names in models.items(): 42 | name, aliases = _pick_primary_name(names) 43 | chat_completion, supports_tools = _ollama_model_capabilities(digest, name) 44 | if not chat_completion: 45 | continue 46 | register( 47 | Ollama(name, supports_tools=supports_tools), 48 | AsyncOllama(name, supports_tools=supports_tools), 49 | aliases=aliases, 50 | ) 51 | 52 | 53 | @llm.hookimpl 54 | def register_embedding_models(register): 55 | models = defaultdict(list) 56 | for model in _get_ollama_models(): 57 | models[model["digest"]].append(model["model"]) 58 | if model["model"].endswith(":latest"): 59 | models[model["digest"]].append(model["model"][: -len(":latest")]) 60 | for names in models.values(): 61 | name, aliases = _pick_primary_name(names) 62 | register(OllamaEmbed(name), aliases=aliases) 63 | 64 | 65 | class _SharedOllama: 66 | can_stream: bool = True 67 | supports_schema: bool = True 68 | supports_tools: bool = True 69 | attachment_types = { 70 | "image/png", 71 | "image/jpeg", 72 | "image/webp", 73 | "image/gif", 74 | } 75 | 76 | class Options(llm.Options): 77 | """Parameters that can be set when the model is run by Ollama. 78 | 79 | See: https://github.com/ollama/ollama/blob/main/docs/modelfile.md#parameter 80 | """ 81 | 82 | mirostat: Optional[int] = Field( 83 | default=None, 84 | description=("Enable Mirostat sampling for controlling perplexity."), 85 | ) 86 | mirostat_eta: Optional[float] = Field( 87 | default=None, 88 | description=( 89 | "Influences how quickly the algorithm responds to feedback from the generated text." 90 | ), 91 | ) 92 | mirostat_tau: Optional[float] = Field( 93 | default=None, 94 | description=( 95 | "Controls the balance between coherence and diversity of the output." 96 | ), 97 | ) 98 | num_ctx: Optional[int] = Field( 99 | default=None, 100 | description="The size of the context window used to generate the next token.", 101 | ) 102 | temperature: Optional[float] = Field( 103 | default=None, 104 | description=( 105 | "The temperature of the model. Increasing the temperature will make the model answer more creatively." 106 | ), 107 | ) 108 | seed: Optional[int] = Field( 109 | default=None, 110 | description=( 111 | "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt." 112 | ), 113 | ) 114 | stop: Optional[List[str]] = Field( 115 | default=None, 116 | description=( 117 | "Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return." 118 | ), 119 | ) 120 | tfs_z: Optional[float] = Field( 121 | default=None, 122 | description=( 123 | "Tail free sampling is used to reduce the impact of less probable tokens from the output." 124 | ), 125 | ) 126 | num_predict: Optional[int] = Field( 127 | default=None, 128 | description=("Maximum number of tokens to predict when generating text."), 129 | ) 130 | top_k: Optional[int] = Field( 131 | default=None, 132 | description=("Reduces the probability of generating nonsense."), 133 | ) 134 | top_p: Optional[float] = Field( 135 | default=None, 136 | description=( 137 | "Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text." 138 | ), 139 | ) 140 | json_object: Optional[bool] = Field( 141 | default=None, 142 | description="Output a valid JSON object {...}. Prompt must mention JSON.", 143 | ) 144 | 145 | def __init__( 146 | self, 147 | model_id: str, 148 | supports_tools: bool = True, 149 | ) -> None: 150 | self.model_id = model_id 151 | self.supports_tools = supports_tools 152 | 153 | def __str__(self) -> str: 154 | return f"Ollama: {self.model_id}" 155 | 156 | def build_messages(self, prompt, conversation): 157 | messages = [] 158 | if not conversation: 159 | if prompt.system: 160 | messages.append({"role": "system", "content": prompt.system}) 161 | messages.append({"role": "user", "content": prompt.prompt}) 162 | if prompt.attachments: 163 | messages[-1]["images"] = [ 164 | attachment.base64_content() for attachment in prompt.attachments 165 | ] 166 | return messages 167 | 168 | current_system = None 169 | for prev_response in conversation.responses: 170 | if ( 171 | prev_response.prompt.system 172 | and prev_response.prompt.system != current_system 173 | ): 174 | messages.append( 175 | {"role": "system", "content": prev_response.prompt.system}, 176 | ) 177 | current_system = prev_response.prompt.system 178 | messages.append({"role": "user", "content": prev_response.prompt.prompt}) 179 | if prev_response.attachments: 180 | messages[-1]["images"] = [ 181 | attachment.base64_content() 182 | for attachment in prev_response.attachments 183 | ] 184 | 185 | messages.append( 186 | {"role": "assistant", "content": prev_response.text_or_raise()} 187 | ) 188 | if prompt.system and prompt.system != current_system: 189 | messages.append({"role": "system", "content": prompt.system}) 190 | messages.append({"role": "user", "content": prompt.prompt}) 191 | for tool_result in prompt.tool_results: 192 | messages.append( 193 | { 194 | "role": "tool", 195 | "content": tool_result.output, 196 | "name": tool_result.name, 197 | } 198 | ) 199 | 200 | return messages 201 | 202 | def set_usage(self, response, usage): 203 | if not usage: 204 | return 205 | input_tokens = usage.pop("prompt_tokens") 206 | output_tokens = usage.pop("completion_tokens") 207 | response.set_usage(input=input_tokens, output=output_tokens) 208 | 209 | 210 | class Ollama(_SharedOllama, llm.Model): 211 | def execute( 212 | self, 213 | prompt: llm.Prompt, 214 | stream: bool, 215 | response: llm.Response, 216 | conversation=None, 217 | ): 218 | messages = self.build_messages(prompt, conversation) 219 | response._prompt_json = {"messages": messages} 220 | options = prompt.options.model_dump(exclude_none=True) 221 | json_object = options.pop("json_object", None) 222 | kwargs = {} 223 | usage = None 224 | if json_object: 225 | kwargs["format"] = "json" 226 | elif prompt.schema: 227 | kwargs["format"] = prompt.schema 228 | if prompt.tools: 229 | kwargs["tools"] = [ 230 | tool.implementation for tool in prompt.tools if tool.implementation 231 | ] 232 | if stream: 233 | response_stream = get_client().chat( 234 | model=self.model_id, 235 | messages=messages, 236 | stream=True, 237 | options=options, 238 | **kwargs, 239 | ) 240 | for chunk in response_stream: 241 | if chunk.message.tool_calls: 242 | for tool_call in chunk.message.tool_calls: 243 | response.add_tool_call( 244 | llm.ToolCall( 245 | name=tool_call.function.name, 246 | arguments=tool_call.function.arguments, 247 | ) 248 | ) 249 | with contextlib.suppress(KeyError): 250 | if chunk["done"]: 251 | usage = { 252 | "prompt_tokens": chunk["prompt_eval_count"], 253 | "completion_tokens": chunk["eval_count"], 254 | } 255 | yield chunk["message"]["content"] 256 | else: 257 | ollama_response = get_client().chat( 258 | model=self.model_id, 259 | messages=messages, 260 | options=options, 261 | **kwargs, 262 | ) 263 | response.response_json = ollama_response.dict() 264 | usage = { 265 | "prompt_tokens": response.response_json["prompt_eval_count"], 266 | "completion_tokens": response.response_json["eval_count"], 267 | } 268 | yield response.response_json["message"]["content"] 269 | if ollama_response.message.tool_calls: 270 | for tool_call in ollama_response.message.tool_calls: 271 | response.add_tool_call( 272 | llm.ToolCall( 273 | name=tool_call.function.name, 274 | arguments=tool_call.function.arguments, 275 | ) 276 | ) 277 | self.set_usage(response, usage) 278 | 279 | 280 | class AsyncOllama(_SharedOllama, llm.AsyncModel): 281 | async def execute( 282 | self, 283 | prompt: llm.Prompt, 284 | stream: bool, 285 | response: llm.Response, 286 | conversation=None, 287 | ): 288 | """ 289 | Executes the Ollama model asynchronously. 290 | 291 | Args: 292 | prompt (llm.Prompt): The prompt for the model. 293 | stream (bool): Whether to stream the response. 294 | response (llm.Response): The response object to populate. 295 | conversation (Optional): The conversation context. 296 | """ 297 | messages = self.build_messages(prompt, conversation) 298 | response._prompt_json = {"messages": messages} 299 | 300 | options = prompt.options.model_dump(exclude_none=True) 301 | json_object = options.pop("json_object", None) 302 | kwargs = {} 303 | usage = None 304 | if json_object: 305 | kwargs["format"] = "json" 306 | elif prompt.schema: 307 | kwargs["format"] = prompt.schema 308 | 309 | try: 310 | if stream: 311 | response_stream = await get_async_client().chat( 312 | model=self.model_id, 313 | messages=messages, 314 | stream=True, 315 | options=options, 316 | **kwargs, 317 | ) 318 | async for chunk in response_stream: 319 | with contextlib.suppress(KeyError): 320 | yield chunk["message"]["content"] 321 | if chunk["done"]: 322 | usage = { 323 | "prompt_tokens": chunk["prompt_eval_count"], 324 | "completion_tokens": chunk["eval_count"], 325 | } 326 | else: 327 | ollama_response = await get_async_client().chat( 328 | model=self.model_id, 329 | messages=messages, 330 | options=options, 331 | **kwargs, 332 | ) 333 | response.response_json = ollama_response.dict() 334 | usage = { 335 | "prompt_tokens": response.response_json["prompt_eval_count"], 336 | "completion_tokens": response.response_json["eval_count"], 337 | } 338 | yield response.response_json["message"]["content"] 339 | self.set_usage(response, usage) 340 | except Exception as e: 341 | raise RuntimeError(f"Async execution failed: {e}") from e 342 | 343 | 344 | class OllamaEmbed(llm.EmbeddingModel): 345 | supports_text = True 346 | supports_binary = False 347 | batch_size = 8 348 | 349 | def __init__(self, model_id): 350 | self.model_id = model_id 351 | self.truncate = True 352 | 353 | # Read OLLAMA_EMBED_TRUNCATE environment variable to decide if truncation 354 | # is enabled. If truncation is disabled and the input is too long, ollama.embed 355 | # call will fail. 356 | if (truncate := os.getenv("OLLAMA_EMBED_TRUNCATE")) is not None: 357 | try: 358 | self.truncate = TypeAdapter(bool).validate_python(truncate) 359 | except ValidationError: 360 | warnings.warn( 361 | f"OLLAMA_EMBED_TRUNCATE is set to '{truncate}', which is not a valid boolean value; defaulting to True", 362 | ) 363 | 364 | def __str__(self) -> str: 365 | return f"Ollama: {self.model_id}" 366 | 367 | def embed_batch(self, items): 368 | result = get_client().embed( 369 | model=self.model_id, 370 | input=items, 371 | truncate=self.truncate, 372 | ) 373 | yield from result["embeddings"] 374 | 375 | 376 | def _pick_primary_name(names: List[str]) -> Tuple[str, Tuple[str, ...]]: 377 | """Pick the primary model name from a list of names. 378 | 379 | The picking algorithm prefers names with the most specific tag, e.g. "llama2:7b-q4_K_M" 380 | over "llama2:7b" over "llama2:latest" over "llama2". 381 | 382 | Parameters 383 | ---------- 384 | names : list[str] 385 | A non-empty list of model names. 386 | 387 | Returns 388 | ------- 389 | tuple[str, tuple[str, ...]] 390 | The primary model name and a tuple with the secondary names. 391 | 392 | """ 393 | if len(names) == 1: 394 | return names[0], () 395 | sorted_names = sorted( 396 | names, 397 | key=lambda name: ( 398 | ":" not in name, # Prefer names with a colon 399 | name.endswith(":latest"), # Non-latest tags preferred over latest 400 | -len(name), # Prefer longer names (likely more specific/quantized) 401 | name, # Finally sort by name itself 402 | ), 403 | ) 404 | return sorted_names[0], tuple(sorted_names[1:]) 405 | 406 | 407 | def _get_ollama_models() -> List[dict]: 408 | """Get a list of models available on Ollama. 409 | 410 | Returns 411 | ------- 412 | list[dict] 413 | A list of models available on Ollama. If the Ollama server is down, an empty 414 | list is returned. 415 | 416 | """ 417 | try: 418 | return get_client().list()["models"] 419 | except: 420 | return [] 421 | 422 | 423 | @cache("model_capabilities", key="digest") 424 | def _ollama_model_capabilities(digest: str, model: str) -> Tuple[bool, bool]: 425 | """Check the capabilities of a model. 426 | 427 | chat_completion: bool 428 | 429 | This is a indicator for if a model can be used for chat or if its an embedding only 430 | model. 431 | 432 | Source of this check is from Ollama server 433 | https://github.com/ollama/ollama/blob/8a9bb0d000ae8201445ef1a590d7136df0a16f8b/server/images.go#L100 434 | It works by checking if the model has a pooling_type key in the model_info, 435 | making the model an embed only model, incapable of completion. 436 | pooling_type is found in 'model_info' as '{model_architecture}.pooling_type' 437 | where model_architecture is saved in the 'model_info' under 'general.architecture'. 438 | note: from what I found, if it is present it is set to '1', but this is not checked 439 | in the reference code. 440 | 441 | tools: bool 442 | 443 | Looks for "tool" in the model prompt template, which is a reasonable heuristic. 444 | 445 | Parameters 446 | ---------- 447 | model : str 448 | The model name. 449 | 450 | Returns 451 | ------- 452 | tuple[bool, bool] 453 | - chat_completion: True if the model can be used for chats, False otherwise. 454 | - supports_tools: True if the model supports tools, False otherwise. 455 | 456 | """ 457 | is_embedding_model = False 458 | try: 459 | model_data = get_client().show(model) 460 | 461 | model_info = model_data["modelinfo"] 462 | model_arch = model_info["general.architecture"] 463 | 464 | supports_tools = "tool" in model_data.get("template", "").lower() 465 | 466 | is_embedding_model = f"{model_arch}.pooling_type" in model_info 467 | except ollama.ResponseError: 468 | # if ollama.show fails, model name is not present in Ollama server, return False 469 | return False, False 470 | # except ConnectionError: 471 | 472 | return not is_embedding_model, supports_tools 473 | -------------------------------------------------------------------------------- /llm_ollama/auth.py: -------------------------------------------------------------------------------- 1 | """Authentication functionality for Ollama clients.""" 2 | 3 | import os 4 | from typing import Optional, Tuple 5 | from urllib.parse import unquote, urlparse 6 | 7 | import httpx 8 | import ollama 9 | 10 | 11 | def get_client() -> ollama.Client: 12 | """Create an Ollama client with host and authentication set based on OLLAMA_HOST.""" 13 | return _create_client(ollama.Client) 14 | 15 | 16 | def get_async_client() -> ollama.AsyncClient: 17 | """Create an asynchronous Ollama client with host and authentication set based on OLLAMA_HOST.""" 18 | return _create_client(ollama.AsyncClient) 19 | 20 | 21 | def _parse_auth_from_url(url: str) -> Tuple[str, Optional[httpx.BasicAuth]]: 22 | """Parse URL and extract credentials if present. 23 | 24 | Parameters 25 | ---------- 26 | url : str 27 | The URL to parse in the format http://username:password@host:port. 28 | 29 | Returns 30 | ------- 31 | Tuple[str, Optional[httpx.BasicAuth]] 32 | A tuple containing the clean URL without credentials and an httpx.BasicAuth 33 | object if credentials were found, or None if no credentials were present. 34 | 35 | """ 36 | parsed = urlparse(url) 37 | auth = None 38 | if parsed.username and parsed.password: 39 | auth = httpx.BasicAuth( 40 | username=unquote(parsed.username), 41 | password=unquote(parsed.password), 42 | ) 43 | netloc = parsed.hostname 44 | if parsed.port: 45 | netloc = f"{netloc}:{parsed.port}" 46 | return parsed._replace(netloc=netloc).geturl(), auth 47 | 48 | 49 | def _parse_auth_from_env() -> Tuple[Optional[str], Optional[httpx.BasicAuth]]: 50 | """Parse OLLAMA_HOST environment variable and extract credentials if present.""" 51 | host = os.getenv("OLLAMA_HOST") 52 | if not host: 53 | return None, None 54 | return _parse_auth_from_url(host) 55 | 56 | 57 | def _create_client(client_class): 58 | """Create a client with host and authentication set based on OLLAMA_HOST.""" 59 | host, auth = _parse_auth_from_env() 60 | kwargs = {} 61 | if host: 62 | kwargs["host"] = host 63 | if auth: 64 | kwargs["auth"] = auth 65 | return client_class(**kwargs) 66 | -------------------------------------------------------------------------------- /llm_ollama/cache.py: -------------------------------------------------------------------------------- 1 | """Caching for expensive function calls.""" 2 | 3 | import functools 4 | import inspect 5 | from pathlib import Path 6 | from typing import Any, Dict 7 | 8 | import yaml 9 | 10 | 11 | class Cache: 12 | """A generic caching mechanism for expensive function calls. 13 | 14 | This class provides a decorator that can be used to cache the results of function 15 | calls based on specific parameter values. The cache is stored in YAML files in a 16 | specified directory. 17 | 18 | Attributes 19 | ---------- 20 | CACHE_VERSION : int 21 | The current cache version. This will be incremented if the cache format changes 22 | or there is a need to invalidate existing caches for any reason. 23 | cache_dir : Path 24 | The directory where cache files are stored. 25 | 26 | """ 27 | 28 | CACHE_VERSION = 2 29 | 30 | def __init__(self, cache_dir: Path) -> None: 31 | """Initialize a cache with the specified directory. 32 | 33 | Parameters 34 | ---------- 35 | cache_dir : Path 36 | Path to the directory where cache files will be stored. The directory will 37 | be created if it does not exist. 38 | 39 | """ 40 | self.cache_dir = Path(cache_dir) 41 | self.cache_dir.mkdir(parents=True, exist_ok=True) 42 | 43 | def __call__(self, cache_name: str, key: str): 44 | """Decorate function to cache results based on parameter values. 45 | 46 | Parameters 47 | ---------- 48 | cache_name : str 49 | The name of the cache file (without extension). This will be used to create 50 | a YAML file in the cache directory. 51 | key : str 52 | Parameter name to use as the cache key. This must be a string that matches 53 | one of the parameter names in the decorated function. 54 | 55 | """ 56 | if not isinstance(key, str): 57 | raise TypeError("Key must be a string parameter name") 58 | 59 | cache_file = self.cache_dir / f"{cache_name}.yaml" 60 | 61 | def decorator(func): 62 | sig = inspect.signature(func) 63 | param_names = list(sig.parameters.keys()) 64 | 65 | @functools.wraps(func) 66 | def wrapper(*args, **kwargs): 67 | if key in kwargs: 68 | cache_key = kwargs[key] 69 | elif key in param_names and param_names.index(key) < len(args): 70 | cache_key = args[param_names.index(key)] 71 | else: 72 | raise ValueError(f"Parameter '{key}' not provided to function") 73 | 74 | # Make sure cache_key is serializable for YAML 75 | try: 76 | cache_key = str(cache_key) 77 | except TypeError as e: 78 | raise ValueError( 79 | f"Parameter '{key}' is not serializable for YAML", 80 | ) from e 81 | 82 | cache: Dict[str, Any] = {} 83 | try: 84 | with cache_file.open("r") as f: 85 | loaded_cache = yaml.safe_load(f) 86 | if loaded_cache is not None: 87 | cache = loaded_cache 88 | except (FileNotFoundError, yaml.scanner.ScannerError): 89 | pass 90 | 91 | # Invalidate cache if version is not present or has changed 92 | if "version" not in cache or cache["version"] != self.CACHE_VERSION: 93 | cache = {"version": self.CACHE_VERSION, "data": {}} 94 | 95 | if cache_key in cache["data"]: 96 | return cache["data"][cache_key] 97 | 98 | result = func(*args, **kwargs) 99 | 100 | cache["data"][cache_key] = result 101 | with cache_file.open("w") as f: 102 | yaml.safe_dump(cache, f) 103 | 104 | return result 105 | 106 | return wrapper 107 | 108 | return decorator 109 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm-ollama" 3 | version = "0.11" 4 | description = "LLM plugin providing access to local Ollama models" 5 | readme = "README.md" 6 | authors = [{ name = "Sergey Alexandrov" }] 7 | license = { text = "Apache-2.0" } 8 | classifiers = ["License :: OSI Approved :: Apache Software License"] 9 | dependencies = ["llm>=0.26a0", "ollama>=0.4", "pydantic>=2"] 10 | requires-python = ">=3.9" 11 | 12 | [project.urls] 13 | Homepage = "https://github.com/taketwo/llm-ollama" 14 | Changelog = "https://github.com/taketwo/llm-ollama/releases" 15 | Issues = "https://github.com/taketwo/llm-ollama/issues" 16 | CI = "https://github.com/taketwo/llm-ollama/actions" 17 | 18 | [project.entry-points.llm] 19 | ollama = "llm_ollama" 20 | 21 | [project.optional-dependencies] 22 | test = ["pytest", "pytest-asyncio", "pytest-mock"] 23 | lint = ["black"] 24 | 25 | [tool.setuptools.packages.find] 26 | include = ["llm_ollama*"] 27 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from _pytest.fixtures import SubRequest 3 | 4 | 5 | def pytest_addoption(parser: pytest.Parser) -> None: 6 | """Add command-line options for integration test control. 7 | 8 | Creates a separate option group for integration testing with two mutually exclusive 9 | flags: 10 | 11 | --integration Force run integration tests, fail if Ollama server unavailable 12 | --no-integration Skip integration tests regardless of Ollama server status 13 | """ 14 | group = parser.getgroup("integration") 15 | group.addoption( 16 | "--integration", 17 | action="store_true", 18 | help="force enable integration tests", 19 | ) 20 | group.addoption( 21 | "--no-integration", 22 | action="store_true", 23 | help="force disable integration tests", 24 | ) 25 | 26 | 27 | def pytest_configure(config: pytest.Config) -> None: 28 | """Configure integration testing setup. 29 | 30 | Performs two tasks: 31 | 1. Registers the 'integration' marker to avoid pytest warnings about unknown markers 32 | 2. Validates CLI options, ensuring --integration and --no-integration are not used 33 | together 34 | """ 35 | config.addinivalue_line( 36 | "markers", 37 | "integration: mark test as requiring Ollama server", 38 | ) 39 | 40 | if config.getoption("--integration") and config.getoption("--no-integration"): 41 | raise pytest.UsageError( 42 | "--integration and --no-integration are mutually exclusive", 43 | ) 44 | 45 | 46 | @pytest.fixture(autouse=True) 47 | def _check_ollama(request: SubRequest) -> None: 48 | """Automatically check Ollama server availability for integration tests. 49 | 50 | This fixture runs automatically for any test marked with @pytest.mark.integration. 51 | It implements the following logic: 52 | * If --no-integration specified: skip test 53 | * If --integration specified: fail if Ollama server unavailable 54 | * Otherwise: skip if Ollama server unavailable 55 | """ 56 | if not request.node.get_closest_marker("integration"): 57 | return 58 | 59 | if request.config.getoption("--no-integration"): 60 | pytest.skip("Integration tests disabled with --no-integration") 61 | 62 | try: 63 | __import__("ollama").list() 64 | except Exception as e: 65 | if request.config.getoption("--integration"): 66 | raise RuntimeError( 67 | "--integration specified but Ollama server not available", 68 | ) from e 69 | pytest.skip("Ollama server not available") 70 | -------------------------------------------------------------------------------- /tests/test_auth.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | import pytest 4 | 5 | from llm_ollama.auth import get_async_client, get_client 6 | 7 | 8 | @pytest.fixture 9 | def mock_basic_auth(): 10 | with patch("llm_ollama.auth.httpx.BasicAuth") as mock: 11 | yield mock 12 | 13 | 14 | @pytest.fixture 15 | def mock_ollama_client(): 16 | with patch("llm_ollama.auth.ollama.Client") as mock: 17 | yield mock 18 | 19 | 20 | @pytest.fixture 21 | def mock_ollama_async_client(): 22 | with patch("llm_ollama.auth.ollama.AsyncClient") as mock: 23 | yield mock 24 | 25 | 26 | def parametrize_clients(): 27 | """Decorator to apply client parameterization to test methods.""" 28 | return pytest.mark.parametrize( 29 | ("get_client_func", "mock_fixture"), 30 | [ 31 | (get_client, "mock_ollama_client"), 32 | (get_async_client, "mock_ollama_async_client"), 33 | ], 34 | ) 35 | 36 | 37 | class TestAuthentication: 38 | """Tests for Ollama client authentication.""" 39 | 40 | @parametrize_clients() 41 | def test_no_environment_variable( 42 | self, 43 | get_client_func, 44 | mock_fixture, 45 | request, 46 | monkeypatch, 47 | ): 48 | """Test client creation when OLLAMA_HOST is not set.""" 49 | monkeypatch.delenv("OLLAMA_HOST", raising=False) 50 | mock_client_class = request.getfixturevalue(mock_fixture) 51 | 52 | get_client_func() 53 | 54 | mock_client_class.assert_called_once_with() 55 | 56 | @parametrize_clients() 57 | def test_host_without_auth( 58 | self, 59 | get_client_func, 60 | mock_fixture, 61 | request, 62 | monkeypatch, 63 | ): 64 | """Test client creation with host but without authentication.""" 65 | monkeypatch.setenv("OLLAMA_HOST", "http://localhost:11434") 66 | mock_client_class = request.getfixturevalue(mock_fixture) 67 | 68 | get_client_func() 69 | 70 | mock_client_class.assert_called_once_with(host="http://localhost:11434") 71 | 72 | @parametrize_clients() 73 | def test_host_with_auth( 74 | self, 75 | get_client_func, 76 | mock_fixture, 77 | request, 78 | mock_basic_auth, 79 | monkeypatch, 80 | ): 81 | """Test client creation with host and authentication.""" 82 | monkeypatch.setenv("OLLAMA_HOST", "http://user:pass@example.com:8080") 83 | mock_client_class = request.getfixturevalue(mock_fixture) 84 | mock_auth_instance = Mock() 85 | mock_basic_auth.return_value = mock_auth_instance 86 | 87 | get_client_func() 88 | 89 | mock_basic_auth.assert_called_once_with(username="user", password="pass") 90 | mock_client_class.assert_called_once_with( 91 | host="http://example.com:8080", 92 | auth=mock_auth_instance, 93 | ) 94 | 95 | 96 | @pytest.mark.parametrize( 97 | ("host_url", "expected_host", "expected_user", "expected_pass"), 98 | [ 99 | ("http://user:pass@localhost:11434", "http://localhost:11434", "user", "pass"), 100 | ( 101 | "https://admin:secret@secure.example.com", 102 | "https://secure.example.com", 103 | "admin", 104 | "secret", 105 | ), 106 | ( 107 | "http://user%40domain:p%40ssw0rd@example.com:8080", 108 | "http://example.com:8080", 109 | "user@domain", 110 | "p@ssw0rd", 111 | ), 112 | ], 113 | ) 114 | def test_various_auth_formats( 115 | host_url, 116 | expected_host, 117 | expected_user, 118 | expected_pass, 119 | mock_basic_auth, 120 | mock_ollama_client, 121 | monkeypatch, 122 | ): 123 | """Test parsing various URL formats with authentication.""" 124 | monkeypatch.setenv("OLLAMA_HOST", host_url) 125 | mock_auth_instance = Mock() 126 | mock_basic_auth.return_value = mock_auth_instance 127 | 128 | get_client() 129 | 130 | mock_basic_auth.assert_called_once_with( 131 | username=expected_user, 132 | password=expected_pass, 133 | ) 134 | mock_ollama_client.assert_called_once_with( 135 | host=expected_host, 136 | auth=mock_auth_instance, 137 | ) 138 | -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | import pytest 4 | import yaml 5 | 6 | from llm_ollama.cache import Cache 7 | 8 | 9 | @pytest.fixture 10 | def cache_dir(tmp_path): 11 | """Fixture providing a temporary directory for cache files.""" 12 | return tmp_path / "cache" 13 | 14 | 15 | @pytest.fixture 16 | def cache(cache_dir): 17 | """Fixture providing a Cache instance for testing.""" 18 | return Cache(cache_dir) 19 | 20 | 21 | def func(value): 22 | """A sample function to be used in tests.""" 23 | return f"result-{value}" 24 | 25 | 26 | @pytest.fixture 27 | def func_instrumented(): 28 | """Fixture providing a instrumented version of the sample function.""" 29 | return Mock(side_effect=func) 30 | 31 | 32 | @pytest.fixture 33 | def func_decorated(cache, func_instrumented): 34 | """Fixture providing a the sample function decorated for caching.""" 35 | return cache("sample", key="value")(func_instrumented) 36 | 37 | 38 | class TestCacheInitialization: 39 | """Tests for Cache initialization.""" 40 | 41 | def test_init_creates_directory(self, cache_dir): 42 | """Test that the cache creates its directory if it does not exist.""" 43 | assert not cache_dir.exists() 44 | Cache(cache_dir) 45 | assert cache_dir.exists() 46 | assert cache_dir.is_dir() 47 | 48 | 49 | class TestCacheDecorator: 50 | """Tests for the @cache decorator.""" 51 | 52 | def test_basic_caching(self, cache, cache_dir, func_instrumented): 53 | """Test basic function caching functionality.""" 54 | 55 | func_decorated = cache("sample", key="value")( 56 | lambda value: func_instrumented(value), 57 | ) 58 | 59 | assert func_decorated("foo") == func("foo") 60 | assert func_instrumented.call_count == 1 61 | 62 | assert func_decorated("foo") == func("foo") 63 | assert func_instrumented.call_count == 1 64 | 65 | assert func_decorated("bar") == func("bar") 66 | assert func_instrumented.call_count == 2 67 | 68 | assert func_decorated("foo") == func("foo") 69 | assert func_instrumented.call_count == 2 70 | 71 | cache_file = cache_dir / "sample.yaml" 72 | assert cache_file.exists() 73 | 74 | with cache_file.open("r") as f: 75 | cache_data = yaml.safe_load(f) 76 | 77 | assert cache_data["version"] == Cache.CACHE_VERSION 78 | assert "data" in cache_data 79 | assert cache_data["data"]["foo"] == func("foo") 80 | assert cache_data["data"]["bar"] == func("bar") 81 | 82 | def test_arg_passing_styles(self, cache, func_instrumented): 83 | """Test caching with different argument passing styles.""" 84 | 85 | @cache("sample", key="value") 86 | def func_with_multiple_args(value, other=None): 87 | return func_instrumented(value) 88 | 89 | # First call 90 | result1 = func_with_multiple_args("foobar") 91 | assert result1 == func("foobar") 92 | assert func_instrumented.call_count == 1 93 | 94 | result2 = func_with_multiple_args(value="foobar") 95 | assert result2 == func("foobar") 96 | assert func_instrumented.call_count == 1 97 | 98 | def test_key_not_provided(self, cache): 99 | """Test error handling when key parameter is not provided.""" 100 | 101 | @cache("sample", key="baz") 102 | def function_without_baz_parameter(value, other=None): 103 | return func(value) 104 | 105 | with pytest.raises(ValueError, match="Parameter 'baz' not provided"): 106 | function_without_baz_parameter("test") 107 | 108 | def test_non_string_key_name(self, cache): 109 | """Test error handling for non-string key names.""" 110 | with pytest.raises(TypeError, match="Key must be a string parameter name"): 111 | cache("sample", key=123)(func) 112 | 113 | 114 | class TestCacheInvalidation: 115 | """Tests for cache invalidation mechanisms.""" 116 | 117 | def test_version_invalidation(self, cache_dir, func_instrumented): 118 | """Test that changing cache version invalidates the cache.""" 119 | 120 | cache1 = Cache(cache_dir) 121 | func_decorated1 = cache1("sample", key="value")( 122 | lambda value: func_instrumented(value), 123 | ) 124 | 125 | func_decorated1("test") 126 | assert func_instrumented.call_count == 1 127 | func_decorated1("test") 128 | assert func_instrumented.call_count == 1 129 | 130 | # Create a new cache with a different version 131 | with patch.object(Cache, "CACHE_VERSION", 3): 132 | cache2 = Cache(cache_dir) 133 | func_decorated2 = cache2("sample", key="value")( 134 | lambda value: func_instrumented(value), 135 | ) 136 | 137 | # Should not use old cache due to version change 138 | func_decorated2("test") 139 | assert func_instrumented.call_count == 2 140 | 141 | @pytest.mark.parametrize( 142 | "content", 143 | [ 144 | "", # Empty file 145 | "# Just a comment", # Comment only 146 | "invalid: yaml: content:", # Invalid YAML 147 | ], 148 | ) 149 | def test_invalid_cache_file(self, cache, cache_dir, func_instrumented, content): 150 | """Test handling of invalid/corrupted cache files.""" 151 | 152 | cache_file = cache_dir / "invalid.yaml" 153 | cache_file.write_text(content) 154 | 155 | func_decorated = cache("invalid", key="value")( 156 | lambda value: func_instrumented(value), 157 | ) 158 | 159 | result = func_decorated("test") 160 | assert result == func("test") 161 | assert func_instrumented.call_count == 1 162 | 163 | # Check that the cache file was properly repaired/recreated 164 | with cache_file.open("r") as f: 165 | cache_data = yaml.safe_load(f) 166 | assert cache_data["version"] == Cache.CACHE_VERSION 167 | assert "test" in cache_data["data"] 168 | 169 | 170 | class TestCacheDecoratorWithMethods: 171 | """Tests for caching on methods and more complex scenarios.""" 172 | 173 | def test_class_method_caching(self, cache): 174 | """Test caching on class methods.""" 175 | func_instrumented = Mock(side_effect=lambda self_val, val: f"{self_val}-{val}") 176 | 177 | class TestClass: 178 | def __init__(self, instance_value="instance"): 179 | self.instance_value = instance_value 180 | 181 | @cache("methods", key="value") 182 | def test_method(self, value): 183 | return func_instrumented(self.instance_value, value) 184 | 185 | # Create two instances 186 | instance1 = TestClass() 187 | instance2 = TestClass("second") 188 | 189 | # First call on first instance 190 | result1 = instance1.test_method("test") 191 | assert result1 == "instance-test" 192 | assert func_instrumented.call_count == 1 193 | 194 | # Call on second instance with same key - should use cache despite different 195 | # instance state 196 | result2 = instance2.test_method("test") 197 | assert result2 == "instance-test" # Not "second-test" 198 | assert func_instrumented.call_count == 1 199 | 200 | # Change instance state and call again - should still use cache 201 | instance1.instance_value = "modified" 202 | result3 = instance1.test_method("test") 203 | assert result3 == "instance-test" # Not "modified-test" 204 | assert func_instrumented.call_count == 1 205 | -------------------------------------------------------------------------------- /tests/test_ollama_integration.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | from llm import get_async_model, get_model, schema_dsl 6 | 7 | 8 | @pytest.mark.integration 9 | @pytest.mark.asyncio 10 | async def test_async_model_prompt(): 11 | """Tests actual run. Needs llama3.2""" 12 | model = get_async_model("llama3.2:latest") 13 | response = model.prompt("a short poem about tea") 14 | response_text = await response.text() 15 | assert len(response_text) > 0 16 | 17 | 18 | @pytest.mark.integration 19 | @pytest.mark.asyncio 20 | async def test_async_model_prompt_with_schema(): 21 | """Tests actual run. Needs llama3.2""" 22 | model = get_async_model("llama3.2:latest") 23 | response = model.prompt( 24 | "Describe a nice dog with a surprising name", 25 | schema=schema_dsl("name, age int, bio"), 26 | ) 27 | response_text = await response.text() 28 | assert len(response_text) > 0 29 | json_response = json.loads(response_text) 30 | assert "name" in json_response 31 | assert "bio" in json_response 32 | assert "age" in json_response 33 | assert isinstance(json_response["age"], int) 34 | 35 | 36 | @pytest.mark.integration 37 | def test_tools(): 38 | """Test tool execution. Needs llama3.2""" 39 | 40 | def multiply(a: int, b: int): 41 | "Multiply two integers" 42 | return int(a) * int(b) 43 | 44 | model = get_model("llama3.2:latest") 45 | chain = model.chain("12345 * 4312", tools=[multiply]) 46 | result = chain.text() 47 | assert "53231640" in result or "53,231,640" in result 48 | -------------------------------------------------------------------------------- /tests/test_ollama_unit.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import AsyncMock, Mock 2 | 3 | import pytest 4 | 5 | from httpx import ConnectError 6 | 7 | from llm import ( 8 | get_async_model, 9 | get_embedding_models_with_aliases, 10 | get_models_with_aliases, 11 | ) 12 | from llm.plugins import load_plugins, pm 13 | 14 | from llm_ollama import Ollama, OllamaEmbed 15 | 16 | 17 | @pytest.fixture 18 | def mock_ollama_client(mocker): 19 | return_value = { 20 | "models": [ 21 | { 22 | "model": "stable-code:3b", 23 | "digest": "aa5ab8afb86208e1c097028d63074f0142ce6079420ea6f68f219933361fd869", 24 | "modelinfo": { 25 | "general.architecture": "stablelm", 26 | }, 27 | }, 28 | { 29 | "model": "llama2:7b", 30 | "digest": "78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962", 31 | "modelinfo": { 32 | "general.architecture": "llama", 33 | }, 34 | }, 35 | { 36 | "model": "llama2:7b-q4_K_M", 37 | "digest": "78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962", 38 | "modelinfo": { 39 | "general.architecture": "llama", 40 | }, 41 | }, 42 | { 43 | "model": "llama2:latest", 44 | "digest": "78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962", 45 | "modelinfo": { 46 | "general.architecture": "llama", 47 | }, 48 | }, 49 | { 50 | "model": "phi3:latest", 51 | "digest": "e2fd6321a5fe6bb3ac8a4e6f1cf04477fd2dea2924cf53237a995387e152ee9c", 52 | "modelinfo": { 53 | "general.architecture": "phi3", 54 | }, 55 | "template": "This one supports tools", 56 | }, 57 | { 58 | "model": "mxbai-embed-large:latest", 59 | "digest": "468836162de7f81e041c43663fedbbba921dcea9b9fefea135685a39b2d83dd8", 60 | "modelinfo": { 61 | "general.architecture": "bert", 62 | "bert.pooling_type": 2, 63 | }, 64 | }, 65 | { 66 | "model": "deepseek-r1:70b", 67 | "digest": "0c1615a8ca32ef41e433aa420558b4685f9fc7f3fd74119860a8e2e389cd7942", 68 | "modelinfo": { 69 | "general.architecture": "llama", 70 | }, 71 | }, 72 | { 73 | "model": "deepseek-r1:70b-llama-distill-q4_K_M", 74 | "digest": "0c1615a8ca32ef41e433aa420558b4685f9fc7f3fd74119860a8e2e389cd7942", 75 | "modelinfo": { 76 | "general.architecture": "llama", 77 | }, 78 | }, 79 | ], 80 | } 81 | client = mocker.patch("llm_ollama.ollama.Client").return_value 82 | client.list.return_value = return_value 83 | client.show.side_effect = lambda name: next( 84 | m for m in return_value["models"] if m["model"] == name 85 | ) 86 | return client 87 | 88 | 89 | def test_plugin_is_installed(): 90 | load_plugins() 91 | names = [mod.__name__ for mod in pm.get_plugins()] 92 | assert "llm_ollama" in names 93 | 94 | 95 | def test_registered_chat_models(mock_ollama_client): 96 | expected = ( 97 | ("deepseek-r1:70b-llama-distill-q4_K_M", ["deepseek-r1:70b"]), 98 | ("llama2:7b-q4_K_M", ["llama2:7b", "llama2:latest", "llama2"]), 99 | ("phi3:latest", ["phi3"]), 100 | ("stable-code:3b", []), 101 | ) 102 | registered_ollama_models = sorted( 103 | [m for m in get_models_with_aliases() if isinstance(m.model, Ollama)], 104 | key=lambda m: m.model.model_id, 105 | ) 106 | assert len(registered_ollama_models) == len(expected) 107 | for model, (name, aliases) in zip(registered_ollama_models, expected): 108 | assert model.model.model_id == name 109 | assert model.aliases == aliases 110 | 111 | 112 | def test_registered_embedding_models(mock_ollama_client): 113 | expected = ( 114 | ("deepseek-r1:70b-llama-distill-q4_K_M", ["deepseek-r1:70b"]), 115 | ("llama2:7b-q4_K_M", ["llama2:7b", "llama2:latest", "llama2"]), 116 | ("mxbai-embed-large:latest", ["mxbai-embed-large"]), 117 | ("phi3:latest", ["phi3"]), 118 | ("stable-code:3b", []), 119 | ) 120 | registered_ollama_models = sorted( 121 | [ 122 | m 123 | for m in get_embedding_models_with_aliases() 124 | if isinstance(m.model, OllamaEmbed) 125 | ], 126 | key=lambda m: m.model.model_id, 127 | ) 128 | assert len(registered_ollama_models) == len(expected) 129 | for model, (name, aliases) in zip(registered_ollama_models, expected): 130 | assert model.model.model_id == name 131 | assert model.aliases == aliases 132 | 133 | 134 | @pytest.mark.parametrize( 135 | ("envvar_value", "expected_truncate_value"), 136 | [ 137 | (None, True), 138 | ("True", True), 139 | ("true", True), 140 | ("yes", True), 141 | ("y", True), 142 | ("on", True), 143 | ("False", False), 144 | ("false", False), 145 | ("no", False), 146 | ("n", False), 147 | ("off", False), 148 | ], 149 | ) 150 | def test_model_embed( 151 | mocker, 152 | envvar_value, 153 | expected_truncate_value, 154 | monkeypatch, 155 | ): 156 | expected = [0.1] * 1024 157 | 158 | client = Mock() 159 | client.embed.return_value = {"embeddings": [expected]} 160 | mocker.patch("llm_ollama.ollama.Client", return_value=client) 161 | 162 | if envvar_value is not None: 163 | monkeypatch.setenv("OLLAMA_EMBED_TRUNCATE", envvar_value) 164 | else: 165 | monkeypatch.delenv("OLLAMA_EMBED_TRUNCATE", raising=False) 166 | 167 | result = OllamaEmbed("mxbai-embed-large:latest").embed("string to embed") 168 | assert result == expected 169 | 170 | _, called_kwargs = client.embed.call_args 171 | assert called_kwargs.get("truncate") is expected_truncate_value 172 | 173 | 174 | def test_registered_models_when_ollama_is_down(mocker): 175 | client = Mock() 176 | client.list.side_effect = ConnectError("[Errno 111] Connection refused") 177 | mocker.patch("llm_ollama.ollama.Client", return_value=client) 178 | assert not any(isinstance(m.model, Ollama) for m in get_models_with_aliases()) 179 | 180 | 181 | @pytest.mark.asyncio 182 | async def test_async_ollama_call(mocker, mock_ollama_client): 183 | # Mock the asynchronous chat method to return an async iterable 184 | async def mock_chat(*args, **kwargs): 185 | messages = [ 186 | {"message": {"content": "Test response 1"}}, 187 | {"message": {"content": "Test response 2"}}, 188 | ] 189 | for msg in messages: 190 | yield msg 191 | 192 | client = AsyncMock() 193 | client.chat.return_value = mock_chat() 194 | 195 | mocker.patch("ollama.AsyncClient", return_value=client) 196 | 197 | # Instantiate the model and send a prompt 198 | model = get_async_model("llama2:7b") 199 | response = model.prompt("Dummy Prompt") 200 | response_text = await response.text() 201 | 202 | assert response_text == "Test response 1Test response 2" 203 | client.chat.assert_called_once() 204 | --------------------------------------------------------------------------------