├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── cog.yml │ ├── publish.yml │ ├── stable-docs.yml │ └── test.yml ├── .gitignore ├── .readthedocs.yaml ├── AGENTS.md ├── Justfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── .gitignore ├── Makefile ├── _templates │ └── base.html ├── aliases.md ├── changelog.md ├── conf.py ├── contributing.md ├── embeddings │ ├── cli.md │ ├── index.md │ ├── python-api.md │ ├── storage.md │ └── writing-plugins.md ├── fragments.md ├── help.md ├── index.md ├── logging.md ├── openai-models.md ├── other-models.md ├── plugins │ ├── advanced-model-plugins.md │ ├── directory.md │ ├── index.md │ ├── installing-plugins.md │ ├── llm-markov │ │ ├── llm_markov.py │ │ └── pyproject.toml │ ├── plugin-hooks.md │ ├── plugin-utilities.md │ └── tutorial-model-plugin.md ├── python-api.md ├── related-tools.md ├── requirements.txt ├── schemas.md ├── setup.md ├── templates.md ├── tools.md └── usage.md ├── llm ├── __init__.py ├── __main__.py ├── cli.py ├── default_plugins │ ├── __init__.py │ ├── default_tools.py │ └── openai_models.py ├── embeddings.py ├── embeddings_migrations.py ├── errors.py ├── hookspecs.py ├── migrations.py ├── models.py ├── plugins.py ├── py.typed ├── templates.py ├── tools.py └── utils.py ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── ruff.toml └── tests ├── cassettes └── test_tools │ ├── test_tool_use_basic.yaml │ └── test_tool_use_chain_of_two_calls.yaml ├── conftest.py ├── test-llm-load-plugins.sh ├── test_aliases.py ├── test_async.py ├── test_attachments.py ├── test_chat.py ├── test_cli_openai_models.py ├── test_cli_options.py ├── test_embed.py ├── test_embed_cli.py ├── test_encode_decode.py ├── test_fragments_cli.py ├── test_keys.py ├── test_llm.py ├── test_llm_logs.py ├── test_migrate.py ├── test_plugins.py ├── test_templates.py ├── test_tools.py └── test_utils.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [simonw] 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | groups: 8 | python-packages: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /.github/workflows/cog.yml: -------------------------------------------------------------------------------- 1 | name: Run Cog 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | 7 | permissions: 8 | contents: write 9 | pull-requests: write 10 | 11 | jobs: 12 | run-cog: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | with: 18 | ref: ${{ github.head_ref }} 19 | 20 | - name: Set up Python 3.11 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: '3.11' 24 | 25 | - name: Install dependencies 26 | run: | 27 | pip install -e '.[test]' 28 | pip install -r docs/requirements.txt 29 | 30 | - name: Run cog 31 | run: | 32 | cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md 33 | 34 | - name: Check for changes 35 | id: check-changes 36 | run: | 37 | if [ -n "$(git diff)" ]; then 38 | echo "changes=true" >> $GITHUB_OUTPUT 39 | else 40 | echo "changes=false" >> $GITHUB_OUTPUT 41 | fi 42 | 43 | - name: Commit and push if changed 44 | if: steps.check-changes.outputs.changes == 'true' 45 | run: | 46 | git config --local user.email "github-actions[bot]@users.noreply.github.com" 47 | git config --local user.name "github-actions[bot]" 48 | git add -A 49 | git commit -m "Ran cog" 50 | git push 51 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | cache-dependency-path: setup.py 24 | - name: Install dependencies 25 | run: | 26 | pip install '.[test]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | deploy: 31 | runs-on: ubuntu-latest 32 | environment: release 33 | permissions: 34 | id-token: write 35 | needs: [test] 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: '3.13' 42 | cache: pip 43 | cache-dependency-path: setup.py 44 | - name: Install dependencies 45 | run: | 46 | pip install setuptools wheel build 47 | - name: Build 48 | run: | 49 | python -m build 50 | - name: Publish 51 | uses: pypa/gh-action-pypi-publish@release/v1 52 | -------------------------------------------------------------------------------- /.github/workflows/stable-docs.yml: -------------------------------------------------------------------------------- 1 | name: Update Stable Docs 2 | 3 | on: 4 | release: 5 | types: [published] 6 | push: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | contents: write 12 | 13 | jobs: 14 | update_stable_docs: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Checkout repository 18 | uses: actions/checkout@v3 19 | with: 20 | fetch-depth: 0 # We need all commits to find docs/ changes 21 | - name: Set up Git user 22 | run: | 23 | git config user.name "Automated" 24 | git config user.email "actions@users.noreply.github.com" 25 | - name: Create stable branch if it does not yet exist 26 | run: | 27 | if ! git ls-remote --heads origin stable | grep stable; then 28 | git checkout -b stable 29 | # If there are any releases, copy docs/ in from most recent 30 | LATEST_RELEASE=$(git tag | sort -Vr | head -n1) 31 | if [ -n "$LATEST_RELEASE" ]; then 32 | rm -rf docs/ 33 | git checkout $LATEST_RELEASE -- docs/ 34 | fi 35 | git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes" 36 | git push -u origin stable 37 | fi 38 | - name: Handle Release 39 | if: github.event_name == 'release' && !github.event.release.prerelease 40 | run: | 41 | git fetch --all 42 | git checkout stable 43 | git reset --hard ${GITHUB_REF#refs/tags/} 44 | git push origin stable --force 45 | - name: Handle Commit to Main 46 | if: contains(github.event.head_commit.message, '!stable-docs') 47 | run: | 48 | git fetch origin 49 | git checkout -b stable origin/stable 50 | # Get the list of modified files in docs/ from the current commit 51 | FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/) 52 | # Check if the list of files is non-empty 53 | if [[ -n "$FILES" ]]; then 54 | # Checkout those files to the stable branch to over-write with their contents 55 | for FILE in $FILES; do 56 | git checkout ${{ github.sha }} -- $FILE 57 | done 58 | git add docs/ 59 | git commit -m "Doc changes from ${{ github.sha }}" 60 | git push origin stable 61 | else 62 | echo "No changes to docs/ in this commit." 63 | exit 0 64 | fi 65 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest, macos-latest, windows-latest] 14 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | cache: pip 22 | cache-dependency-path: setup.py 23 | - name: Install dependencies 24 | run: | 25 | pip install -e '.[test]' 26 | - name: Run tests 27 | run: | 28 | python -m pytest -vv 29 | - name: Check if cog needs to be run 30 | if: matrix.os != 'windows-latest' 31 | run: | 32 | cog --check \ 33 | -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \ 34 | docs/**/*.md docs/*.md 35 | - name: Run Black 36 | if: matrix.os != 'windows-latest' 37 | run: | 38 | black --check . 39 | - name: Run mypy 40 | if: matrix.os != 'windows-latest' 41 | run: | 42 | mypy llm 43 | - name: Run ruff 44 | if: matrix.os != 'windows-latest' 45 | run: | 46 | ruff check . 47 | - name: Check it builds 48 | run: | 49 | python -m build 50 | - name: Run test-llm-load-plugins.sh 51 | if: matrix.os != 'windows-latest' 52 | run: | 53 | llm install llm-cluster llm-mistral 54 | ./tests/test-llm-load-plugins.sh 55 | - name: Upload artifact of builds 56 | if: matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest' 57 | uses: actions/upload-artifact@v4 58 | with: 59 | name: dist-${{ matrix.os }}-${{ matrix.python-version }} 60 | path: dist/* 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | .idea/ 11 | .vscode/ 12 | uv.lock -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.11" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | formats: 12 | - pdf 13 | - epub 14 | 15 | python: 16 | install: 17 | - requirements: docs/requirements.txt 18 | - method: pip 19 | path: . 20 | -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- 1 | # AGENTS.md 2 | 3 | This project uses a Python environment for development and tests. 4 | 5 | ## Setting up a development environment 6 | 7 | 1. Install the project with its test dependencies: 8 | ```bash 9 | pip install -e '.[test]' 10 | ``` 11 | 2. Run the tests: 12 | ```bash 13 | pytest 14 | ``` 15 | 16 | ## Building the documentation 17 | 18 | Run the following commands if you want to build the docs locally: 19 | 20 | ```bash 21 | cd docs 22 | pip install -r requirements.txt 23 | make html 24 | ``` 25 | -------------------------------------------------------------------------------- /Justfile: -------------------------------------------------------------------------------- 1 | # Run tests and linters 2 | @default: test lint 3 | 4 | # Install dependencies and test dependencies 5 | @init: 6 | pipenv run pip install -e '.[test]' 7 | 8 | # Run pytest with supplied options 9 | @test *options: 10 | pipenv run pytest {{options}} 11 | 12 | # Run linters 13 | @lint: 14 | echo "Linters..." 15 | echo " Black" 16 | pipenv run black . --check 17 | echo " cog" 18 | pipenv run cog --check \ 19 | -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \ 20 | README.md docs/*.md 21 | echo " mypy" 22 | pipenv run mypy llm 23 | echo " ruff" 24 | pipenv run ruff check . 25 | 26 | # Run mypy 27 | @mypy: 28 | pipenv run mypy llm 29 | 30 | # Rebuild docs with cog 31 | @cog: 32 | pipenv run cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md 33 | 34 | # Serve live docs on localhost:8000 35 | @docs: cog 36 | rm -rf docs/_build 37 | cd docs && pipenv run make livehtml 38 | 39 | # Apply Black 40 | @black: 41 | pipenv run black . 42 | 43 | # Run automatic fixes 44 | @fix: cog 45 | pipenv run ruff check . --fix 46 | pipenv run black . 47 | 48 | # Push commit if tests pass 49 | @push: test lint 50 | git push 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-exclude tests/* 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = sqlite-utils 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | livehtml: 23 | sphinx-autobuild -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0) 24 | -------------------------------------------------------------------------------- /docs/_templates/base.html: -------------------------------------------------------------------------------- 1 | {%- extends "!base.html" %} 2 | 3 | {%- block htmltitle -%} 4 | {% if not docstitle %} 5 | {{ title|striptags|e }} 6 | {% elif pagename == master_doc %} 7 | LLM: A CLI utility and Python library for interacting with Large Language Models 8 | {% else %} 9 | {{ title|striptags|e }} - {{ docstitle|striptags|e }} 10 | {% endif %} 11 | {%- endblock -%} 12 | 13 | {% block site_meta %} 14 | {{ super() }} 15 | 16 | {% endblock %} 17 | -------------------------------------------------------------------------------- /docs/aliases.md: -------------------------------------------------------------------------------- 1 | (aliases)= 2 | # Model aliases 3 | 4 | LLM supports model aliases, which allow you to refer to a model by a short name instead of its full ID. 5 | 6 | ## Listing aliases 7 | 8 | To list current aliases, run this: 9 | 10 | ```bash 11 | llm aliases 12 | ``` 13 | Example output: 14 | 15 | 21 | ``` 22 | 4o : gpt-4o 23 | chatgpt-4o : chatgpt-4o-latest 24 | 4o-mini : gpt-4o-mini 25 | 4.1 : gpt-4.1 26 | 4.1-mini : gpt-4.1-mini 27 | 4.1-nano : gpt-4.1-nano 28 | 3.5 : gpt-3.5-turbo 29 | chatgpt : gpt-3.5-turbo 30 | chatgpt-16k : gpt-3.5-turbo-16k 31 | 3.5-16k : gpt-3.5-turbo-16k 32 | 4 : gpt-4 33 | gpt4 : gpt-4 34 | 4-32k : gpt-4-32k 35 | gpt-4-turbo-preview : gpt-4-turbo 36 | 4-turbo : gpt-4-turbo 37 | 4t : gpt-4-turbo 38 | gpt-4.5 : gpt-4.5-preview 39 | 3.5-instruct : gpt-3.5-turbo-instruct 40 | chatgpt-instruct : gpt-3.5-turbo-instruct 41 | ada : text-embedding-ada-002 (embedding) 42 | ada-002 : text-embedding-ada-002 (embedding) 43 | 3-small : text-embedding-3-small (embedding) 44 | 3-large : text-embedding-3-large (embedding) 45 | 3-small-512 : text-embedding-3-small-512 (embedding) 46 | 3-large-256 : text-embedding-3-large-256 (embedding) 47 | 3-large-1024 : text-embedding-3-large-1024 (embedding) 48 | ``` 49 | 50 | 51 | Add `--json` to get that list back as JSON: 52 | 53 | ```bash 54 | llm aliases list --json 55 | ``` 56 | Example output: 57 | ```json 58 | { 59 | "3.5": "gpt-3.5-turbo", 60 | "chatgpt": "gpt-3.5-turbo", 61 | "4": "gpt-4", 62 | "gpt4": "gpt-4", 63 | "ada": "ada-002" 64 | } 65 | ``` 66 | 67 | ## Adding a new alias 68 | 69 | The `llm aliases set ` command can be used to add a new alias: 70 | 71 | ```bash 72 | llm aliases set mini gpt-4o-mini 73 | ``` 74 | You can also pass one or more `-q search` options to set an alias on the first model matching those search terms: 75 | ```bash 76 | llm aliases set mini -q 4o -q mini 77 | ``` 78 | Now you can run the `gpt-4o-mini` model using the `mini` alias like this: 79 | ```bash 80 | llm -m mini 'An epic Greek-style saga about a cheesecake that builds a SQL database from scratch' 81 | ``` 82 | Aliases can be set for both regular models and {ref}`embedding models ` using the same command. To set an alias of `oai` for the OpenAI `ada-002` embedding model use this: 83 | ```bash 84 | llm aliases set oai ada-002 85 | ``` 86 | Now you can embed a string using that model like so: 87 | ```bash 88 | llm embed -c 'hello world' -m oai 89 | ``` 90 | Output: 91 | ``` 92 | [-0.014945968054234982, 0.0014304015785455704, ...] 93 | ``` 94 | 95 | ## Removing an alias 96 | 97 | The `llm aliases remove ` command will remove the specified alias: 98 | 99 | ```bash 100 | llm aliases remove mini 101 | ``` 102 | 103 | ## Viewing the aliases file 104 | 105 | Aliases are stored in an `aliases.json` file in the LLM configuration directory. 106 | 107 | To see the path to that file, run this: 108 | 109 | ```bash 110 | llm aliases path 111 | ``` 112 | To view the content of that file, run this: 113 | 114 | ```bash 115 | cat "$(llm aliases path)" 116 | ``` -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from subprocess import PIPE, Popen 5 | 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | # import os 20 | # import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | 24 | # -- General configuration ------------------------------------------------ 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | # 28 | # needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "myst_parser", 35 | "sphinx_copybutton", 36 | "sphinx_markdown_builder", 37 | "sphinx.ext.autodoc", 38 | ] 39 | myst_enable_extensions = ["colon_fence"] 40 | 41 | markdown_http_base = "https://llm.datasette.io/en/stable" 42 | markdown_uri_doc_suffix = ".html" 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ["_templates"] 46 | 47 | # The suffix(es) of source filenames. 48 | # You can specify multiple suffix as a list of string: 49 | # 50 | # source_suffix = ['.rst', '.md'] 51 | source_suffix = ".rst" 52 | 53 | # The master toctree document. 54 | master_doc = "index" 55 | 56 | # General information about the project. 57 | project = "LLM" 58 | copyright = "2025, Simon Willison" 59 | author = "Simon Willison" 60 | 61 | # The version info for the project you're documenting, acts as replacement for 62 | # |version| and |release|, also used in various other places throughout the 63 | # built documents. 64 | # 65 | # The short X.Y version. 66 | pipe = Popen("git describe --tags --always", stdout=PIPE, shell=True) 67 | git_version = pipe.stdout.read().decode("utf8") 68 | 69 | if git_version: 70 | version = git_version.rsplit("-", 1)[0] 71 | release = git_version 72 | else: 73 | version = "" 74 | release = "" 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = "en" 82 | 83 | # List of patterns, relative to source directory, that match files and 84 | # directories to ignore when looking for source files. 85 | # This patterns also effect to html_static_path and html_extra_path 86 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 87 | 88 | # The name of the Pygments (syntax highlighting) style to use. 89 | pygments_style = "sphinx" 90 | 91 | # If true, `todo` and `todoList` produce output, else they produce nothing. 92 | todo_include_todos = False 93 | 94 | 95 | # -- Options for HTML output ---------------------------------------------- 96 | 97 | # The theme to use for HTML and HTML Help pages. See the documentation for 98 | # a list of builtin themes. 99 | # 100 | html_theme = "furo" 101 | 102 | # Theme options are theme-specific and customize the look and feel of a theme 103 | # further. For a list of options available for each theme, see the 104 | # documentation. 105 | 106 | html_theme_options = {} 107 | html_title = "LLM" 108 | 109 | # Add any paths that contain custom static files (such as style sheets) here, 110 | # relative to this directory. They are copied after the builtin static files, 111 | # so a file named "default.css" will overwrite the builtin "default.css". 112 | html_static_path = [] 113 | 114 | 115 | # -- Options for HTMLHelp output ------------------------------------------ 116 | 117 | # Output file base name for HTML help builder. 118 | htmlhelp_basename = "llm-doc" 119 | 120 | 121 | # -- Options for LaTeX output --------------------------------------------- 122 | 123 | latex_elements = { 124 | # The paper size ('letterpaper' or 'a4paper'). 125 | # 126 | # 'papersize': 'letterpaper', 127 | # The font size ('10pt', '11pt' or '12pt'). 128 | # 129 | # 'pointsize': '10pt', 130 | # Additional stuff for the LaTeX preamble. 131 | # 132 | # 'preamble': '', 133 | # Latex figure (float) alignment 134 | # 135 | # 'figure_align': 'htbp', 136 | } 137 | 138 | # Grouping the document tree into LaTeX files. List of tuples 139 | # (source start file, target name, title, 140 | # author, documentclass [howto, manual, or own class]). 141 | latex_documents = [ 142 | ( 143 | master_doc, 144 | "llm.tex", 145 | "LLM documentation", 146 | "Simon Willison", 147 | "manual", 148 | ) 149 | ] 150 | 151 | 152 | # -- Options for manual page output --------------------------------------- 153 | 154 | # One entry per manual page. List of tuples 155 | # (source start file, name, description, authors, manual section). 156 | man_pages = [ 157 | ( 158 | master_doc, 159 | "llm", 160 | "LLM documentation", 161 | [author], 162 | 1, 163 | ) 164 | ] 165 | 166 | 167 | # -- Options for Texinfo output ------------------------------------------- 168 | 169 | # Grouping the document tree into Texinfo files. List of tuples 170 | # (source start file, target name, title, author, 171 | # dir menu entry, description, category) 172 | texinfo_documents = [ 173 | ( 174 | master_doc, 175 | "llm", 176 | "LLM documentation", 177 | author, 178 | "llm", 179 | " Access large language models from the command-line ", 180 | "Miscellaneous", 181 | ) 182 | ] 183 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | To contribute to this tool, first checkout the code. Then create a new virtual environment: 4 | ```bash 5 | cd llm 6 | python -m venv venv 7 | source venv/bin/activate 8 | ``` 9 | Or if you are using `pipenv`: 10 | ```bash 11 | pipenv shell 12 | ``` 13 | Now install the dependencies and test dependencies: 14 | ```bash 15 | pip install -e '.[test]' 16 | ``` 17 | To run the tests: 18 | ```bash 19 | pytest 20 | ``` 21 | 22 | ## Updating recorded HTTP API interactions and associated snapshots 23 | 24 | This project uses [pytest-recording](https://github.com/kiwicom/pytest-recording) to record OpenAI API responses for some of the tests, and [syrupy](https://github.com/syrupy-project/syrupy) to capture snapshots of their results. 25 | 26 | If you add a new test that calls the API you can capture the API response and snapshot like this: 27 | ```bash 28 | PYTEST_OPENAI_API_KEY="$(llm keys get openai)" pytest --record-mode once --snapshot-update 29 | ``` 30 | Then review the new snapshots in `tests/__snapshots__/` to make sure they look correct. 31 | 32 | ## Debugging tricks 33 | 34 | The default OpenAI plugin has a debugging mechanism for showing the exact requests and responses that were sent to the OpenAI API. 35 | 36 | Set the `LLM_OPENAI_SHOW_RESPONSES` environment variable like this: 37 | ```bash 38 | LLM_OPENAI_SHOW_RESPONSES=1 llm -m chatgpt 'three word slogan for an an otter-run bakery' 39 | ``` 40 | This will output details of the API requests and responses to the console. 41 | 42 | Use `--no-stream` to see a more readable version of the body that avoids streaming the response: 43 | 44 | ```bash 45 | LLM_OPENAI_SHOW_RESPONSES=1 llm -m chatgpt --no-stream \ 46 | 'three word slogan for an an otter-run bakery' 47 | ``` 48 | 49 | ## Documentation 50 | 51 | Documentation for this project uses [MyST](https://myst-parser.readthedocs.io/) - it is written in Markdown and rendered using Sphinx. 52 | 53 | To build the documentation locally, run the following: 54 | ```bash 55 | cd docs 56 | pip install -r requirements.txt 57 | make livehtml 58 | ``` 59 | This will start a live preview server, using [sphinx-autobuild](https://pypi.org/project/sphinx-autobuild/). 60 | 61 | The CLI `--help` examples in the documentation are managed using [Cog](https://github.com/nedbat/cog). Update those files like this: 62 | ```bash 63 | just cog 64 | ``` 65 | You'll need [Just](https://github.com/casey/just) installed to run this command. 66 | 67 | ## Release process 68 | 69 | To release a new version: 70 | 71 | 1. Update `docs/changelog.md` with the new changes. 72 | 2. Update the version number in `pyproject.toml` 73 | 3. [Create a GitHub release](https://github.com/simonw/llm/releases/new) for the new version. 74 | 4. Wait for the package to push to PyPI and then... 75 | 5. Run the [regenerate.yaml](https://github.com/simonw/homebrew-llm/actions/workflows/regenerate.yaml) workflow to update the Homebrew tap to the latest version. 76 | -------------------------------------------------------------------------------- /docs/embeddings/index.md: -------------------------------------------------------------------------------- 1 | (embeddings)= 2 | # Embeddings 3 | 4 | Embedding models allow you to take a piece of text - a word, sentence, paragraph or even a whole article, and convert that into an array of floating point numbers. 5 | 6 | This floating point array is called an "embedding vector", and works as a numerical representation of the semantic meaning of the content in a many-multi-dimensional space. 7 | 8 | By calculating the distance between embedding vectors, we can identify which content is semantically "nearest" to other content. 9 | 10 | This can be used to build features like related article lookups. It can also be used to build semantic search, where a user can search for a phrase and get back results that are semantically similar to that phrase even if they do not share any exact keywords. 11 | 12 | Some embedding models like [CLIP](https://github.com/simonw/llm-clip) can even work against binary files such as images. These can be used to search for images that are similar to other images, or to search for images that are semantically similar to a piece of text. 13 | 14 | LLM supports multiple embedding models through {ref}`plugins `. Once installed, an embedding model can be used on the command-line or via the Python API to calculate and store embeddings for content, and then to perform similarity searches against those embeddings. 15 | 16 | See [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/) for an extended explanation of embeddings, why they are useful and what you can do with them. 17 | 18 | ```{toctree} 19 | --- 20 | maxdepth: 3 21 | --- 22 | cli 23 | python-api 24 | writing-plugins 25 | storage 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/embeddings/python-api.md: -------------------------------------------------------------------------------- 1 | (embeddings-python-api)= 2 | # Using embeddings from Python 3 | 4 | You can load an embedding model using its model ID or alias like this: 5 | ```python 6 | import llm 7 | 8 | embedding_model = llm.get_embedding_model("3-small") 9 | ``` 10 | To embed a string, returning a Python list of floating point numbers, use the `.embed()` method: 11 | ```python 12 | vector = embedding_model.embed("my happy hound") 13 | ``` 14 | If the embedding model can handle binary input, you can call `.embed()` with a byte string instead. You can check the `supports_binary` property to see if this is supported: 15 | ```python 16 | if embedding_model.supports_binary: 17 | vector = embedding_model.embed(open("my-image.jpg", "rb").read()) 18 | ``` 19 | The `embedding_model.supports_text` property indicates if the model supports text input. 20 | 21 | Many embeddings models are more efficient when you embed multiple strings or binary strings at once. To embed multiple strings at once, use the `.embed_multi()` method: 22 | ```python 23 | vectors = list(embedding_model.embed_multi(["my happy hound", "my dissatisfied cat"])) 24 | ``` 25 | This returns a generator that yields one embedding vector per string. 26 | 27 | Embeddings are calculated in batches. By default all items will be processed in a single batch, unless the underlying embedding model has defined its own preferred batch size. You can pass a custom batch size using `batch_size=N`, for example: 28 | 29 | ```python 30 | vectors = list(embedding_model.embed_multi(lines_from_file, batch_size=20)) 31 | ``` 32 | 33 | (embeddings-python-collections)= 34 | ## Working with collections 35 | 36 | The `llm.Collection` class can be used to work with **collections** of embeddings from Python code. 37 | 38 | A collection is a named group of embedding vectors, each stored along with their IDs in a SQLite database table. 39 | 40 | To work with embeddings in this way you will need an instance of a [sqlite-utils Database](https://sqlite-utils.datasette.io/en/stable/python-api.html#connecting-to-or-creating-a-database) object. You can then pass that to the `llm.Collection` constructor along with the unique string name of the collection and the ID of the embedding model you will be using with that collection: 41 | 42 | ```python 43 | import sqlite_utils 44 | import llm 45 | 46 | # This collection will use an in-memory database that will be 47 | # discarded when the Python process exits 48 | collection = llm.Collection("entries", model_id="3-small") 49 | 50 | # Or you can persist the database to disk like this: 51 | db = sqlite_utils.Database("my-embeddings.db") 52 | collection = llm.Collection("entries", db, model_id="3-small") 53 | 54 | # You can pass a model directly using model= instead of model_id= 55 | embedding_model = llm.get_embedding_model("3-small") 56 | collection = llm.Collection("entries", db, model=embedding_model) 57 | ``` 58 | If the collection already exists in the database you can omit the `model` or `model_id` argument - the model ID will be read from the `collections` table. 59 | 60 | To embed a single string and store it in the collection, use the `embed()` method: 61 | 62 | ```python 63 | collection.embed("hound", "my happy hound") 64 | ``` 65 | This stores the embedding for the string "my happy hound" in the `entries` collection under the key `hound`. 66 | 67 | Add `store=True` to store the text content itself in the database table along with the embedding vector. 68 | 69 | To attach additional metadata to an item, pass a JSON-compatible dictionary as the `metadata=` argument: 70 | 71 | ```python 72 | collection.embed("hound", "my happy hound", metadata={"name": "Hound"}, store=True) 73 | ``` 74 | This additional metadata will be stored as JSON in the `metadata` column of the embeddings database table. 75 | 76 | (embeddings-python-bulk)= 77 | ### Storing embeddings in bulk 78 | 79 | The `collection.embed_multi()` method can be used to store embeddings for multiple items at once. This can be more efficient for some embedding models. 80 | 81 | ```python 82 | collection.embed_multi( 83 | [ 84 | ("hound", "my happy hound"), 85 | ("cat", "my dissatisfied cat"), 86 | ], 87 | # Add this to store the strings in the content column: 88 | store=True, 89 | ) 90 | ``` 91 | To include metadata to be stored with each item, call `embed_multi_with_metadata()`: 92 | 93 | ```python 94 | collection.embed_multi_with_metadata( 95 | [ 96 | ("hound", "my happy hound", {"name": "Hound"}), 97 | ("cat", "my dissatisfied cat", {"name": "Cat"}), 98 | ], 99 | # This can also take the store=True argument: 100 | store=True, 101 | ) 102 | ``` 103 | The `batch_size=` argument defaults to 100, and will be used unless the embedding model itself defines a lower batch size. You can adjust this if you are having trouble with memory while embedding large collections: 104 | 105 | ```python 106 | collection.embed_multi( 107 | ( 108 | (i, line) 109 | for i, line in enumerate(lines_in_file) 110 | ), 111 | batch_size=10 112 | ) 113 | ``` 114 | 115 | (embeddings-python-collection-class)= 116 | ### Collection class reference 117 | 118 | A collection instance has the following properties and methods: 119 | 120 | - `id` - the integer ID of the collection in the database 121 | - `name` - the string name of the collection (unique in the database) 122 | - `model_id` - the string ID of the embedding model used for this collection 123 | - `model()` - returns the `EmbeddingModel` instance, based on that `model_id` 124 | - `count()` - returns the integer number of items in the collection 125 | - `embed(id: str, text: str, metadata: dict=None, store: bool=False)` - embeds the given string and stores it in the collection under the given ID. Can optionally include metadata (stored as JSON) and store the text content itself in the database table. 126 | - `embed_multi(entries: Iterable, store: bool=False, batch_size: int=100)` - see above 127 | - `embed_multi_with_metadata(entries: Iterable, store: bool=False, batch_size: int=100)` - see above 128 | - `similar(query: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the given query string 129 | - `similar_by_id(id: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the item with the given ID 130 | - `similar_by_vector(vector: List[float], number: int=10, skip_id: str=None)` - returns a list of entries that are most similar to the given embedding vector, optionally skipping the entry with the given ID 131 | - `delete()` - deletes the collection and its embeddings from the database 132 | 133 | There is also a `Collection.exists(db, name)` class method which returns a boolean value and can be used to determine if a collection exists or not in a database: 134 | 135 | ```python 136 | if Collection.exists(db, "entries"): 137 | print("The entries collection exists") 138 | ``` 139 | 140 | (embeddings-python-similar)= 141 | ## Retrieving similar items 142 | 143 | Once you have populated a collection of embeddings you can retrieve the entries that are most similar to a given string using the `similar()` method. 144 | 145 | This method uses a brute force approach, calculating distance scores against every document. This is fine for small collections, but will not scale to large collections. See [issue 216](https://github.com/simonw/llm/issues/216) for plans to add a more scalable approach via vector indexes provided by plugins. 146 | 147 | ```python 148 | for entry in collection.similar("hound"): 149 | print(entry.id, entry.score) 150 | ``` 151 | The string will first by embedded using the model for the collection. 152 | 153 | The `entry` object returned is an object with the following properties: 154 | 155 | - `id` - the string ID of the item 156 | - `score` - the floating point similarity score between the item and the query string 157 | - `content` - the string text content of the item, if it was stored - or `None` 158 | - `metadata` - the dictionary (from JSON) metadata for the item, if it was stored - or `None` 159 | 160 | This defaults to returning the 10 most similar items. You can change this by passing a different `number=` argument: 161 | ```python 162 | for entry in collection.similar("hound", number=5): 163 | print(entry.id, entry.score) 164 | ``` 165 | The `similar_by_id()` method takes the ID of another item in the collection and returns the most similar items to that one, based on the embedding that has already been stored for it: 166 | 167 | ```python 168 | for entry in collection.similar_by_id("cat"): 169 | print(entry.id, entry.score) 170 | ``` 171 | The item itself is excluded from the results. 172 | 173 | (embeddings-sql-schema)= 174 | ## SQL schema 175 | 176 | Here's the SQL schema used by the embeddings database: 177 | 178 | 193 | ```sql 194 | CREATE TABLE [collections] ( 195 | [id] INTEGER PRIMARY KEY, 196 | [name] TEXT, 197 | [model] TEXT 198 | ) 199 | CREATE TABLE "embeddings" ( 200 | [collection_id] INTEGER REFERENCES [collections]([id]), 201 | [id] TEXT, 202 | [embedding] BLOB, 203 | [content] TEXT, 204 | [content_blob] BLOB, 205 | [content_hash] BLOB, 206 | [metadata] TEXT, 207 | [updated] INTEGER, 208 | PRIMARY KEY ([collection_id], [id]) 209 | ) 210 | ``` 211 | 212 | -------------------------------------------------------------------------------- /docs/embeddings/storage.md: -------------------------------------------------------------------------------- 1 | (embeddings-storage)= 2 | # Embedding storage format 3 | 4 | The default output format of the `llm embed` command is a JSON array of floating point numbers. 5 | 6 | LLM stores embeddings in space-efficient format: a little-endian binary sequences of 32-bit floating point numbers, each represented using 4 bytes. 7 | 8 | These are stored in a `BLOB` column in a SQLite database. 9 | 10 | The following Python functions can be used to convert between this format and an array of floating point numbers: 11 | 12 | ```python 13 | import struct 14 | 15 | def encode(values): 16 | return struct.pack("<" + "f" * len(values), *values) 17 | 18 | def decode(binary): 19 | return struct.unpack("<" + "f" * (len(binary) // 4), binary) 20 | ``` 21 | 22 | These functions are available as `llm.encode()` and `llm.decode()`. 23 | 24 | If you are using [NumPy](https://numpy.org/) you can decode one of these binary values like this: 25 | 26 | ```python 27 | import numpy as np 28 | 29 | numpy_array = np.frombuffer(value, "` for details on how to develop and package a plugin. 5 | 6 | This page shows an example plugin that implements and registers a new embedding model. 7 | 8 | There are two components to an embedding model plugin: 9 | 10 | 1. An implementation of the `register_embedding_models()` hook, which takes a `register` callback function and calls it to register the new model with the LLM plugin system. 11 | 2. A class that extends the `llm.EmbeddingModel` abstract base class. 12 | 13 | The only required method on this class is `embed_batch(texts)`, which takes an iterable of strings and returns an iterator over lists of floating point numbers. 14 | 15 | The following example uses the [sentence-transformers](https://github.com/UKPLab/sentence-transformers) package to provide access to the [MiniLM-L6](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) embedding model. 16 | 17 | ```python 18 | import llm 19 | from sentence_transformers import SentenceTransformer 20 | 21 | 22 | @llm.hookimpl 23 | def register_embedding_models(register): 24 | model_id = "sentence-transformers/all-MiniLM-L6-v2" 25 | register(SentenceTransformerModel(model_id, model_id), aliases=("all-MiniLM-L6-v2",)) 26 | 27 | 28 | class SentenceTransformerModel(llm.EmbeddingModel): 29 | def __init__(self, model_id, model_name): 30 | self.model_id = model_id 31 | self.model_name = model_name 32 | self._model = None 33 | 34 | def embed_batch(self, texts): 35 | if self._model is None: 36 | self._model = SentenceTransformer(self.model_name) 37 | results = self._model.encode(texts) 38 | return (list(map(float, result)) for result in results) 39 | ``` 40 | Once installed, the model provided by this plugin can be used with the {ref}`llm embed ` command like this: 41 | 42 | ```bash 43 | cat file.txt | llm embed -m sentence-transformers/all-MiniLM-L6-v2 44 | ``` 45 | Or via its registered alias like this: 46 | ```bash 47 | cat file.txt | llm embed -m all-MiniLM-L6-v2 48 | ``` 49 | [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) is a complete example of a plugin that provides an embedding model. 50 | 51 | [Execute Jina embeddings with a CLI using llm-embed-jina](https://simonwillison.net/2023/Oct/26/llm-embed-jina/#how-i-built-the-plugin) talks through a similar process to add support for the [Jina embeddings models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/). 52 | 53 | ## Embedding binary content 54 | 55 | If your model can embed binary content, use the `supports_binary` property to indicate that: 56 | 57 | ```python 58 | class ClipEmbeddingModel(llm.EmbeddingModel): 59 | model_id = "clip" 60 | supports_binary = True 61 | supports_text= True 62 | ``` 63 | 64 | `supports_text` defaults to `True` and so is not necessary here. You can set it to `False` if your model only supports binary data. 65 | 66 | If your model accepts binary, your `.embed_batch()` model may be called with a list of Python bytestrings. These may be mixed with regular strings if the model accepts both types of input. 67 | 68 | [llm-clip](https://github.com/simonw/llm-clip) is an example of a model that can embed both binary and text content. 69 | -------------------------------------------------------------------------------- /docs/fragments.md: -------------------------------------------------------------------------------- 1 | (fragments)= 2 | # Fragments 3 | 4 | LLM prompts can optionally be composed out of **fragments** - reusable pieces of text that are logged just once to the database and can then be attached to multiple prompts. 5 | 6 | These are particularly useful when you are working with long context models, which support feeding large amounts of text in as part of your prompt. 7 | 8 | Fragments primarily exist to save space in the database, but may be used to support other features such as vendor prompt caching as well. 9 | 10 | Fragments can be specified using several different mechanisms: 11 | 12 | - URLs to text files online 13 | - Paths to text files on disk 14 | - Aliases that have been attached to a specific fragment 15 | - Hash IDs of stored fragments, where the ID is the SHA256 hash of the fragment content 16 | - Fragments that are provided by custom plugins - these look like `plugin-name:argument` 17 | 18 | (fragments-usage)= 19 | ## Using fragments in a prompt 20 | 21 | Use the `-f/--fragment` option to specify one or more fragments to be used as part of your prompt: 22 | 23 | ```bash 24 | llm -f https://llm.datasette.io/robots.txt "Explain this robots.txt file in detail" 25 | ``` 26 | Here we are specifying a fragment using a URL. The contents of that URL will be included in the prompt that is sent to the model, prepended prior to the prompt text. 27 | 28 | The `-f` option can be used multiple times to combine together multiple fragments. 29 | 30 | Fragments can also be files on disk, for example: 31 | ```bash 32 | llm -f setup.py 'extract the metadata' 33 | ``` 34 | Use `-` to specify a fragment that is read from standard input: 35 | ```bash 36 | llm -f - 'extract the metadata' < setup.py 37 | ``` 38 | This will read the contents of `setup.py` from standard input and use it as a fragment. 39 | 40 | Fragments can also be used as part of your system prompt. Use `--sf value` or `--system-fragment value` instead of `-f`. 41 | 42 | ## Using fragments in chat 43 | 44 | The `chat` command also supports the `-f` and `--sf` arguments to start a chat with fragments. 45 | 46 | ```bash 47 | llm chat -f my_doc.txt 48 | Chatting with gpt-4 49 | Type 'exit' or 'quit' to exit 50 | Type '!multi' to enter multiple lines, then '!end' to finish 51 | Type '!edit' to open your default editor and modify the prompt. 52 | Type '!fragment [ ...]' to insert one or more fragments 53 | > Explain this document to me 54 | ``` 55 | 56 | Fragments can also be added *during* a chat conversation using the `!fragment ` command. 57 | 58 | ```bash 59 | Chatting with gpt-4 60 | Type 'exit' or 'quit' to exit 61 | Type '!multi' to enter multiple lines, then '!end' to finish 62 | Type '!edit' to open your default editor and modify the prompt. 63 | Type '!fragment [ ...]' to insert one or more fragments 64 | > !fragment https://llm.datasette.io/en/stable/fragments.html 65 | ``` 66 | 67 | This can be combined with `!multi`: 68 | 69 | ```bash 70 | > !multi 71 | Explain the difference between fragments and templates to me 72 | !fragment https://llm.datasette.io/en/stable/fragments.html https://llm.datasette.io/en/stable/templates.html 73 | !end 74 | ``` 75 | 76 | Any `!fragment` lines found in a prompt created with `!edit` will not be parsed. 77 | 78 | (fragments-browsing)= 79 | ## Browsing fragments 80 | 81 | You can view a truncated version of the fragments you have previously stored in your database with the `llm fragments` command: 82 | 83 | ```bash 84 | llm fragments 85 | ``` 86 | The output from that command looks like this: 87 | 88 | ```yaml 89 | - hash: 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815 90 | aliases: [] 91 | datetime_utc: '2025-04-06 07:36:53' 92 | source: https://raw.githubusercontent.com/simonw/llm-docs/refs/heads/main/llm/0.22.txt 93 | content: |- 94 | 95 | 96 | docs/aliases.md 97 | 98 | (aliases)= 99 | #... 100 | - hash: 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c 101 | aliases: [] 102 | datetime_utc: '2025-04-06 23:03:47' 103 | source: simonw/files-to-prompt/pyproject.toml 104 | content: |- 105 | [project] 106 | name = "files-to-prompt" 107 | version = "0.6" 108 | description = "Concatenate a directory full of... 109 | ``` 110 | Those long `hash` values are IDs that can be used to reference a fragment in the future: 111 | ```bash 112 | llm -f 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c 'Extract metadata' 113 | ``` 114 | Use `-q searchterm` one or more times to search for fragments that match a specific set of search terms. 115 | 116 | To view the full content of a fragment use `llm fragments show`: 117 | ```bash 118 | llm fragments show 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815 119 | ``` 120 | 121 | (fragments-aliases)= 122 | ## Setting aliases for fragments 123 | 124 | You can assign aliases to fragments that you use often using the `llm fragments set` command: 125 | ```bash 126 | llm fragments set mydocs ./docs.md 127 | ``` 128 | To remove an alias, use `llm fragments remove`: 129 | ```bash 130 | llm fragments remove mydocs 131 | ``` 132 | You can then use that alias in place of the fragment hash ID: 133 | ```bash 134 | llm -f mydocs 'How do I access metadata?' 135 | ``` 136 | Use `llm fragments --aliases` to see a full list of fragments that have been assigned aliases: 137 | ```bash 138 | llm fragments --aliases 139 | ``` 140 | 141 | (fragments-logs)= 142 | ## Viewing fragments in your logs 143 | 144 | The `llm logs` command lists the fragments that were used for a prompt. By default these are listed as fragment hash IDs, but you can use the `--expand` option to show the full content of each fragment. 145 | 146 | This command will show the expanded fragments for your most recent conversation: 147 | 148 | ```bash 149 | llm logs -c --expand 150 | ``` 151 | You can filter for logs that used a specific fragment using the `-f/--fragment` option: 152 | ```bash 153 | llm logs -c -f 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815 154 | ``` 155 | This accepts URLs, file paths, aliases, and hash IDs. 156 | 157 | Multiple `-f` options will return responses that used **all** of the specified fragments. 158 | 159 | Fragments are returned by `llm logs --json` as well. By default these are truncated but you can add the `-e/--expand` option to show the full content of each fragment. 160 | 161 | ```bash 162 | llm logs -c --json --expand 163 | ``` 164 | 165 | (fragments-plugins)= 166 | ## Using fragments from plugins 167 | 168 | LLM plugins can provide custom fragment loaders which do useful things. 169 | 170 | One example is the [llm-fragments-github plugin](https://github.com/simonw/llm-fragments-github). This can convert the files from a public GitHub repository into a list of fragments, allowing you to ask questions about the full repository. 171 | 172 | Here's how to try that out: 173 | 174 | ```bash 175 | llm install llm-fragments-github 176 | llm -f github:simonw/s3-credentials 'Suggest new features for this tool' 177 | ``` 178 | This plugin turns a single call to `-f github:simonw/s3-credentials` into multiple fragments, one for every text file in the [simonw/s3-credentials](https://github.com/simonw/s3-credentials) GitHub repository. 179 | 180 | Running `llm logs -c` will show that this prompt incorporated 26 fragments, one for each file. 181 | 182 | Running `llm logs -c --usage --expand` (shortcut: `llm logs -cue`) includes token usage information and turns each fragment ID into a full copy of that file. [Here's the output of that command](https://gist.github.com/simonw/c9bbbc5f6560b01f4b7882ac0194fb25). 183 | 184 | Fragment plugins can return {ref}`attachments ` (such as images) as well. 185 | 186 | See the {ref}`register_fragment_loaders() plugin hook ` documentation for details on writing your own custom fragment plugin. 187 | 188 | (fragments-loaders)= 189 | ## Listing available fragment prefixes 190 | 191 | The `llm fragments loaders` command shows all prefixes that have been installed by plugins, along with their documentation: 192 | 193 | ```bash 194 | llm install llm-fragments-github 195 | llm fragments loaders 196 | ``` 197 | Example output: 198 | ``` 199 | github: 200 | Load files from a GitHub repository as fragments 201 | 202 | Argument is a GitHub repository URL or username/repository 203 | 204 | issue: 205 | Fetch GitHub issue and comments as Markdown 206 | 207 | Argument is either "owner/repo/NUMBER" 208 | or "https://github.com/owner/repo/issues/NUMBER" 209 | ``` 210 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # LLM 2 | 3 | [![GitHub repo](https://img.shields.io/badge/github-repo-green)](https://github.com/simonw/llm) 4 | [![PyPI](https://img.shields.io/pypi/v/llm.svg)](https://pypi.org/project/llm/) 5 | [![Changelog](https://img.shields.io/github/v/release/simonw/llm?include_prereleases&label=changelog)](https://llm.datasette.io/en/stable/changelog.html) 6 | [![Tests](https://github.com/simonw/llm/workflows/Test/badge.svg)](https://github.com/simonw/llm/actions?query=workflow%3ATest) 7 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/llm/blob/main/LICENSE) 8 | [![Discord](https://img.shields.io/discord/823971286308356157?label=discord)](https://datasette.io/discord-llm) 9 | [![Homebrew](https://img.shields.io/homebrew/installs/dy/llm?color=yellow&label=homebrew&logo=homebrew)](https://formulae.brew.sh/formula/llm) 10 | 11 | A CLI tool and Python library for interacting with **OpenAI**, **Anthropic's Claude**, **Google's Gemini**, **Meta's Llama** and dozens of other Large Language Models, both via remote APIs and with models that can be installed and run on your own machine. 12 | 13 | Watch **[Language models on the command-line](https://www.youtube.com/watch?v=QUXQNi6jQ30)** on YouTube for a demo or [read the accompanying detailed notes](https://simonwillison.net/2024/Jun/17/cli-language-models/). 14 | 15 | With LLM you can: 16 | - {ref}`Run prompts from the command-line ` 17 | - {ref}`Store prompts and responses in SQLite ` 18 | - {ref}`Generate and store embeddings ` 19 | - {ref}`Extract structured content from text and images ` 20 | - ... and much, much more 21 | 22 | ## Quick start 23 | 24 | First, install LLM using `pip` or Homebrew or `pipx` or `uv`: 25 | 26 | ```bash 27 | pip install llm 28 | ``` 29 | Or with Homebrew (see {ref}`warning note `): 30 | ```bash 31 | brew install llm 32 | ``` 33 | Or with [pipx](https://pypa.github.io/pipx/): 34 | ```bash 35 | pipx install llm 36 | ``` 37 | Or with [uv](https://docs.astral.sh/uv/guides/tools/) 38 | ```bash 39 | uv tool install llm 40 | ``` 41 | If you have an [OpenAI API key](https://platform.openai.com/api-keys) key you can run this: 42 | ```bash 43 | # Paste your OpenAI API key into this 44 | llm keys set openai 45 | 46 | # Run a prompt (with the default gpt-4o-mini model) 47 | llm "Ten fun names for a pet pelican" 48 | 49 | # Extract text from an image 50 | llm "extract text" -a scanned-document.jpg 51 | 52 | # Use a system prompt against a file 53 | cat myfile.py | llm -s "Explain this code" 54 | ``` 55 | Run prompts against [Gemini](https://aistudio.google.com/apikey) or [Anthropic](https://console.anthropic.com/) with their respective plugins: 56 | ```bash 57 | llm install llm-gemini 58 | llm keys set gemini 59 | # Paste Gemini API key here 60 | llm -m gemini-2.0-flash 'Tell me fun facts about Mountain View' 61 | 62 | llm install llm-anthropic 63 | llm keys set anthropic 64 | # Paste Anthropic API key here 65 | llm -m claude-4-opus 'Impress me with wild facts about turnips' 66 | ``` 67 | You can also {ref}`install a plugin ` to access models that can run on your local device. If you use [Ollama](https://ollama.com/): 68 | ```bash 69 | # Install the plugin 70 | llm install llm-ollama 71 | 72 | # Download and run a prompt against the Orca Mini 7B model 73 | ollama pull llama3.2:latest 74 | llm -m llama3.2:latest 'What is the capital of France?' 75 | ``` 76 | To start {ref}`an interactive chat ` with a model, use `llm chat`: 77 | ```bash 78 | llm chat -m gpt-4.1 79 | ``` 80 | ``` 81 | Chatting with gpt-4.1 82 | Type 'exit' or 'quit' to exit 83 | Type '!multi' to enter multiple lines, then '!end' to finish 84 | Type '!edit' to open your default editor and modify the prompt. 85 | Type '!fragment [ ...]' to insert one or more fragments 86 | > Tell me a joke about a pelican 87 | Why don't pelicans like to tip waiters? 88 | 89 | Because they always have a big bill! 90 | ``` 91 | 92 | More background on this project: 93 | 94 | - [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/) 95 | - [The LLM CLI tool now supports self-hosted language models via plugins](https://simonwillison.net/2023/Jul/12/llm/) 96 | - [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/) 97 | - [Build an image search engine with llm-clip, chat with models with llm chat](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/) 98 | - [You can now run prompts against images, audio and video in your terminal using LLM](https://simonwillison.net/2024/Oct/29/llm-multi-modal/) 99 | - [Structured data extraction from unstructured content using LLM schemas](https://simonwillison.net/2025/Feb/28/llm-schemas/) 100 | - [Long context support in LLM 0.24 using fragments and template plugins](https://simonwillison.net/2025/Apr/7/long-context-llm/) 101 | 102 | See also [the llm tag](https://simonwillison.net/tags/llm/) on my blog. 103 | 104 | ## Contents 105 | 106 | ```{toctree} 107 | --- 108 | maxdepth: 3 109 | --- 110 | setup 111 | usage 112 | openai-models 113 | other-models 114 | tools 115 | schemas 116 | templates 117 | fragments 118 | aliases 119 | embeddings/index 120 | plugins/index 121 | python-api 122 | logging 123 | related-tools 124 | help 125 | contributing 126 | ``` 127 | ```{toctree} 128 | --- 129 | maxdepth: 1 130 | --- 131 | changelog 132 | ``` -------------------------------------------------------------------------------- /docs/logging.md: -------------------------------------------------------------------------------- 1 | (logging)= 2 | # Logging to SQLite 3 | 4 | `llm` defaults to logging all prompts and responses to a SQLite database. 5 | 6 | You can find the location of that database using the `llm logs path` command: 7 | 8 | ```bash 9 | llm logs path 10 | ``` 11 | On my Mac that outputs: 12 | ``` 13 | /Users/simon/Library/Application Support/io.datasette.llm/logs.db 14 | ``` 15 | This will differ for other operating systems. 16 | 17 | To avoid logging an individual prompt, pass `--no-log` or `-n` to the command: 18 | ```bash 19 | llm 'Ten names for cheesecakes' -n 20 | ``` 21 | 22 | To turn logging by default off: 23 | 24 | ```bash 25 | llm logs off 26 | ``` 27 | If you've turned off logging you can still log an individual prompt and response by adding `--log`: 28 | ```bash 29 | llm 'Five ambitious names for a pet pterodactyl' --log 30 | ``` 31 | To turn logging by default back on again: 32 | 33 | ```bash 34 | llm logs on 35 | ``` 36 | To see the status of the logs database, run this: 37 | ```bash 38 | llm logs status 39 | ``` 40 | Example output: 41 | ``` 42 | Logging is ON for all prompts 43 | Found log database at /Users/simon/Library/Application Support/io.datasette.llm/logs.db 44 | Number of conversations logged: 33 45 | Number of responses logged: 48 46 | Database file size: 19.96MB 47 | ``` 48 | 49 | (logging-view)= 50 | 51 | ## Viewing the logs 52 | 53 | You can view the logs using the `llm logs` command: 54 | ```bash 55 | llm logs 56 | ``` 57 | This will output the three most recent logged items in Markdown format, showing both the prompt and the response formatted using Markdown. 58 | 59 | To get back just the most recent prompt response as plain text, add `-r/--response`: 60 | 61 | ```bash 62 | llm logs -r 63 | ``` 64 | Use `-x/--extract` to extract and return the first fenced code block from the selected log entries: 65 | 66 | ```bash 67 | llm logs --extract 68 | ``` 69 | Or `--xl/--extract-last` for the last fenced code block: 70 | ```bash 71 | llm logs --extract-last 72 | ``` 73 | 74 | Add `--json` to get the log messages in JSON instead: 75 | 76 | ```bash 77 | llm logs --json 78 | ``` 79 | 80 | Add `-n 10` to see the ten most recent items: 81 | ```bash 82 | llm logs -n 10 83 | ``` 84 | Or `-n 0` to see everything that has ever been logged: 85 | ```bash 86 | llm logs -n 0 87 | ``` 88 | You can truncate the display of the prompts and responses using the `-t/--truncate` option. This can help make the JSON output more readable - though the `--short` option is usually better. 89 | ```bash 90 | llm logs -n 1 -t --json 91 | ``` 92 | Example output: 93 | ```json 94 | [ 95 | { 96 | "id": "01jm8ec74wxsdatyn5pq1fp0s5", 97 | "model": "anthropic/claude-3-haiku-20240307", 98 | "prompt": "hi", 99 | "system": null, 100 | "prompt_json": null, 101 | "response": "Hello! How can I assist you today?", 102 | "conversation_id": "01jm8ec74taftdgj2t4zra9z0j", 103 | "duration_ms": 560, 104 | "datetime_utc": "2025-02-16T22:34:30.374882+00:00", 105 | "input_tokens": 8, 106 | "output_tokens": 12, 107 | "token_details": null, 108 | "conversation_name": "hi", 109 | "conversation_model": "anthropic/claude-3-haiku-20240307", 110 | "attachments": [] 111 | } 112 | ] 113 | ``` 114 | 115 | (logging-short)= 116 | 117 | ### -s/--short mode 118 | 119 | Use `-s/--short` to see a shortened YAML log with truncated prompts and no responses: 120 | ```bash 121 | llm logs -n 2 --short 122 | ``` 123 | Example output: 124 | ```yaml 125 | - model: deepseek-reasoner 126 | datetime: '2025-02-02T06:39:53' 127 | conversation: 01jk2pk05xq3d0vgk0202zrsg1 128 | prompt: H01 There are five huts. H02 The Scotsman lives in the purple hut. H03 The Welshman owns the parrot. H04 Kombucha is... 129 | - model: o3-mini 130 | datetime: '2025-02-02T19:03:05' 131 | conversation: 01jk40qkxetedzpf1zd8k9bgww 132 | system: Formatting re-enabled. Write a detailed README with extensive usage examples. 133 | prompt: ./Cargo.toml [package] name = "py-limbo" version... 134 | ``` 135 | Include `-u/--usage` to include token usage information: 136 | 137 | ```bash 138 | llm logs -n 1 --short --usage 139 | ``` 140 | Example output: 141 | ```yaml 142 | - model: o3-mini 143 | datetime: '2025-02-16T23:00:56' 144 | conversation: 01jm8fxxnef92n1663c6ays8xt 145 | system: Produce Python code that demonstrates every possible usage of yaml.dump 146 | with all of the arguments it can take, especi... 147 | prompt: ./setup.py 148 | NAME = 'PyYAML' VERSION = '7.0.0.dev0... 149 | usage: 150 | input: 74793 151 | output: 3550 152 | details: 153 | completion_tokens_details: 154 | reasoning_tokens: 2240 155 | ``` 156 | 157 | (logging-conversation)= 158 | 159 | ### Logs for a conversation 160 | 161 | To view the logs for the most recent {ref}`conversation ` you have had with a model, use `-c`: 162 | 163 | ```bash 164 | llm logs -c 165 | ``` 166 | To see logs for a specific conversation based on its ID, use `--cid ID` or `--conversation ID`: 167 | 168 | ```bash 169 | llm logs --cid 01h82n0q9crqtnzmf13gkyxawg 170 | ``` 171 | 172 | (logging-search)= 173 | 174 | ### Searching the logs 175 | 176 | You can search the logs for a search term in the `prompt` or the `response` columns. 177 | ```bash 178 | llm logs -q 'cheesecake' 179 | ``` 180 | The most relevant terms will be shown at the bottom of the output. 181 | 182 | (logging-filter-id)= 183 | 184 | ### Filtering past a specific ID 185 | 186 | If you want to retrieve all of the logs that were recorded since a specific response ID you can do so using these options: 187 | 188 | - `--id-gt $ID` - every record with an ID greater than $ID 189 | - `--id-gte $ID` - every record with an ID greater than or equal to $ID 190 | 191 | IDs are always issued in ascending order by time, so this provides a useful way to see everything that has happened since a particular record. 192 | 193 | This can be particularly useful when {ref}`working with schema data `, where you might want to access every record that you have created using a specific `--schema` but exclude records you have previously processed. 194 | 195 | (logging-filter-model)= 196 | 197 | ### Filtering by model 198 | 199 | You can filter to logs just for a specific model (or model alias) using `-m/--model`: 200 | ```bash 201 | llm logs -m chatgpt 202 | ``` 203 | 204 | (logging-filter-fragments)= 205 | 206 | ### Filtering by prompts that used specific fragments 207 | 208 | The `-f/--fragment X` option will filter for just responses that were created using the specified {ref}`fragment ` hash or alias or URL or filename. 209 | 210 | Fragments are displayed in the logs as their hash ID. Add `-e/--expand` to display fragments as their full content - this option works for both the default Markdown and the `--json` mode: 211 | 212 | ```bash 213 | llm logs -f https://llm.datasette.io/robots.txt --expand 214 | ``` 215 | You can display just the content for a specific fragment hash ID (or alias) using the `llm fragments show` command: 216 | 217 | ```bash 218 | llm fragments show 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680 219 | ``` 220 | If you provide multiple fragments you will get back responses that used _all_ of those fragments. 221 | 222 | (logging-filter-tools)= 223 | 224 | ### Filtering by prompts that used specific tools 225 | 226 | You can filter for responses that used tools from specific fragments with the `--tool/-T` option: 227 | 228 | ```bash 229 | llm logs -T simple_eval 230 | ``` 231 | This will match responses that involved a _result_ from that tool. If the tool was not executed it will not be included in the filtered responses. 232 | 233 | Pass `--tool/-T` multiple times for responses that used all of the specified tools. 234 | 235 | Use the `llm logs --tools` flag to see _all_ responses that involved at least one tool result, including from `--functions`: 236 | 237 | ```bash 238 | llm logs --tools 239 | ``` 240 | 241 | (logging-filter-schemas)= 242 | 243 | ### Browsing data collected using schemas 244 | 245 | The `--schema X` option can be used to view responses that used the specified schema, using any of the {ref}`ways to specify a schema `: 246 | 247 | ```bash 248 | llm logs --schema 'name, age int, bio' 249 | ``` 250 | 251 | This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation ` for details. 252 | 253 | (logging-datasette)= 254 | 255 | ## Browsing logs using Datasette 256 | 257 | You can also use [Datasette](https://datasette.io/) to browse your logs like this: 258 | 259 | ```bash 260 | datasette "$(llm logs path)" 261 | ``` 262 | 263 | (logging-backup)= 264 | 265 | ## Backing up your database 266 | 267 | You can backup your logs to another file using the `llm logs backup` command: 268 | 269 | ```bash 270 | llm logs backup /tmp/backup.db 271 | ``` 272 | This uses SQLite [VACUUM INTO](https://sqlite.org/lang_vacuum.html#vacuum_with_an_into_clause) under the hood. 273 | 274 | (logging-sql-schema)= 275 | 276 | ## SQL schema 277 | 278 | Here's the SQL schema used by the `logs.db` database: 279 | 280 | 305 | ```sql 306 | CREATE TABLE [conversations] ( 307 | [id] TEXT PRIMARY KEY, 308 | [name] TEXT, 309 | [model] TEXT 310 | ); 311 | CREATE TABLE [schemas] ( 312 | [id] TEXT PRIMARY KEY, 313 | [content] TEXT 314 | ); 315 | CREATE TABLE "responses" ( 316 | [id] TEXT PRIMARY KEY, 317 | [model] TEXT, 318 | [prompt] TEXT, 319 | [system] TEXT, 320 | [prompt_json] TEXT, 321 | [options_json] TEXT, 322 | [response] TEXT, 323 | [response_json] TEXT, 324 | [conversation_id] TEXT REFERENCES [conversations]([id]), 325 | [duration_ms] INTEGER, 326 | [datetime_utc] TEXT, 327 | [input_tokens] INTEGER, 328 | [output_tokens] INTEGER, 329 | [token_details] TEXT, 330 | [schema_id] TEXT REFERENCES [schemas]([id]), 331 | [resolved_model] TEXT 332 | ); 333 | CREATE VIRTUAL TABLE [responses_fts] USING FTS5 ( 334 | [prompt], 335 | [response], 336 | content=[responses] 337 | ); 338 | CREATE TABLE [attachments] ( 339 | [id] TEXT PRIMARY KEY, 340 | [type] TEXT, 341 | [path] TEXT, 342 | [url] TEXT, 343 | [content] BLOB 344 | ); 345 | CREATE TABLE [prompt_attachments] ( 346 | [response_id] TEXT REFERENCES [responses]([id]), 347 | [attachment_id] TEXT REFERENCES [attachments]([id]), 348 | [order] INTEGER, 349 | PRIMARY KEY ([response_id], 350 | [attachment_id]) 351 | ); 352 | CREATE TABLE [fragments] ( 353 | [id] INTEGER PRIMARY KEY, 354 | [hash] TEXT, 355 | [content] TEXT, 356 | [datetime_utc] TEXT, 357 | [source] TEXT 358 | ); 359 | CREATE TABLE [fragment_aliases] ( 360 | [alias] TEXT PRIMARY KEY, 361 | [fragment_id] INTEGER REFERENCES [fragments]([id]) 362 | ); 363 | CREATE TABLE "prompt_fragments" ( 364 | [response_id] TEXT REFERENCES [responses]([id]), 365 | [fragment_id] INTEGER REFERENCES [fragments]([id]), 366 | [order] INTEGER, 367 | PRIMARY KEY ([response_id], 368 | [fragment_id], 369 | [order]) 370 | ); 371 | CREATE TABLE "system_fragments" ( 372 | [response_id] TEXT REFERENCES [responses]([id]), 373 | [fragment_id] INTEGER REFERENCES [fragments]([id]), 374 | [order] INTEGER, 375 | PRIMARY KEY ([response_id], 376 | [fragment_id], 377 | [order]) 378 | ); 379 | CREATE TABLE [tools] ( 380 | [id] INTEGER PRIMARY KEY, 381 | [hash] TEXT, 382 | [name] TEXT, 383 | [description] TEXT, 384 | [input_schema] TEXT, 385 | [plugin] TEXT 386 | ); 387 | CREATE TABLE [tool_responses] ( 388 | [tool_id] INTEGER REFERENCES [tools]([id]), 389 | [response_id] TEXT REFERENCES [responses]([id]), 390 | PRIMARY KEY ([tool_id], 391 | [response_id]) 392 | ); 393 | CREATE TABLE [tool_calls] ( 394 | [id] INTEGER PRIMARY KEY, 395 | [response_id] TEXT REFERENCES [responses]([id]), 396 | [tool_id] INTEGER REFERENCES [tools]([id]), 397 | [name] TEXT, 398 | [arguments] TEXT, 399 | [tool_call_id] TEXT 400 | ); 401 | CREATE TABLE "tool_results" ( 402 | [id] INTEGER PRIMARY KEY, 403 | [response_id] TEXT REFERENCES [responses]([id]), 404 | [tool_id] INTEGER REFERENCES [tools]([id]), 405 | [name] TEXT, 406 | [output] TEXT, 407 | [tool_call_id] TEXT, 408 | [instance_id] INTEGER REFERENCES [tool_instances]([id]), 409 | [exception] TEXT 410 | ); 411 | CREATE TABLE [tool_instances] ( 412 | [id] INTEGER PRIMARY KEY, 413 | [plugin] TEXT, 414 | [name] TEXT, 415 | [arguments] TEXT 416 | ); 417 | ``` 418 | 419 | `responses_fts` configures [SQLite full-text search](https://www.sqlite.org/fts5.html) against the `prompt` and `response` columns in the `responses` table. 420 | -------------------------------------------------------------------------------- /docs/openai-models.md: -------------------------------------------------------------------------------- 1 | (openai-models)= 2 | 3 | # OpenAI models 4 | 5 | LLM ships with a default plugin for talking to OpenAI's API. OpenAI offer both language models and embedding models, and LLM can access both types. 6 | 7 | (openai-models-configuration)= 8 | 9 | ## Configuration 10 | 11 | All OpenAI models are accessed using an API key. You can obtain one from [the API keys page](https://platform.openai.com/api-keys) on their site. 12 | 13 | Once you have created a key, configure LLM to use it by running: 14 | 15 | ```bash 16 | llm keys set openai 17 | ``` 18 | Then paste in the API key. 19 | 20 | (openai-models-language)= 21 | 22 | ## OpenAI language models 23 | 24 | Run `llm models` for a full list of available models. The OpenAI models supported by LLM are: 25 | 26 | 33 | ``` 34 | OpenAI Chat: gpt-4o (aliases: 4o) 35 | OpenAI Chat: chatgpt-4o-latest (aliases: chatgpt-4o) 36 | OpenAI Chat: gpt-4o-mini (aliases: 4o-mini) 37 | OpenAI Chat: gpt-4o-audio-preview 38 | OpenAI Chat: gpt-4o-audio-preview-2024-12-17 39 | OpenAI Chat: gpt-4o-audio-preview-2024-10-01 40 | OpenAI Chat: gpt-4o-mini-audio-preview 41 | OpenAI Chat: gpt-4o-mini-audio-preview-2024-12-17 42 | OpenAI Chat: gpt-4.1 (aliases: 4.1) 43 | OpenAI Chat: gpt-4.1-mini (aliases: 4.1-mini) 44 | OpenAI Chat: gpt-4.1-nano (aliases: 4.1-nano) 45 | OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt) 46 | OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k) 47 | OpenAI Chat: gpt-4 (aliases: 4, gpt4) 48 | OpenAI Chat: gpt-4-32k (aliases: 4-32k) 49 | OpenAI Chat: gpt-4-1106-preview 50 | OpenAI Chat: gpt-4-0125-preview 51 | OpenAI Chat: gpt-4-turbo-2024-04-09 52 | OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t) 53 | OpenAI Chat: gpt-4.5-preview-2025-02-27 54 | OpenAI Chat: gpt-4.5-preview (aliases: gpt-4.5) 55 | OpenAI Chat: o1 56 | OpenAI Chat: o1-2024-12-17 57 | OpenAI Chat: o1-preview 58 | OpenAI Chat: o1-mini 59 | OpenAI Chat: o3-mini 60 | OpenAI Chat: o3 61 | OpenAI Chat: o4-mini 62 | OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct) 63 | ``` 64 | 65 | 66 | See [the OpenAI models documentation](https://platform.openai.com/docs/models) for details of each of these. 67 | 68 | `gpt-4o-mini` (aliased to `4o-mini`) is the least expensive model, and is the default for if you don't specify a model at all. Consult [OpenAI's model documentation](https://platform.openai.com/docs/models) for details of the other models. 69 | 70 | [o1-pro](https://platform.openai.com/docs/models/o1-pro) is not available through the Chat Completions API used by LLM's default OpenAI plugin. You can install the new [llm-openai-plugin](https://github.com/simonw/llm-openai-plugin) plugin to access that model. 71 | 72 | ## Model features 73 | 74 | The following features work with OpenAI models: 75 | 76 | - {ref}`System prompts ` can be used to provide instructions that have a higher weight than the prompt itself. 77 | - {ref}`Attachments `. Many OpenAI models support image inputs - check which ones using `llm models --options`. Any model that accepts images can also accept PDFs. 78 | - {ref}`Schemas ` can be used to influence the JSON structure of the model output. 79 | - {ref}`Model options ` can be used to set parameters like `temperature`. Use `llm models --options` for a full list of supported options. 80 | 81 | (openai-models-embedding)= 82 | 83 | ## OpenAI embedding models 84 | 85 | Run `llm embed-models` for a list of {ref}`embedding models `. The following OpenAI embedding models are supported by LLM: 86 | 87 | ``` 88 | ada-002 (aliases: ada, oai) 89 | 3-small 90 | 3-large 91 | 3-small-512 92 | 3-large-256 93 | 3-large-1024 94 | ``` 95 | 96 | The `3-small` model is currently the most inexpensive. `3-large` costs more but is more capable - see [New embedding models and API updates](https://openai.com/blog/new-embedding-models-and-api-updates) on the OpenAI blog for details and benchmarks. 97 | 98 | An important characteristic of any embedding model is the size of the vector it returns. Smaller vectors cost less to store and query, but may be less accurate. 99 | 100 | OpenAI `3-small` and `3-large` vectors can be safely truncated to lower dimensions without losing too much accuracy. The `-int` models provided by LLM are pre-configured to do this, so `3-large-256` is the `3-large` model truncated to 256 dimensions. 101 | 102 | The vector size of the supported OpenAI embedding models are as follows: 103 | 104 | | Model | Size | 105 | | --- | --- | 106 | | ada-002 | 1536 | 107 | | 3-small | 1536 | 108 | | 3-large | 3072 | 109 | | 3-small-512 | 512 | 110 | | 3-large-256 | 256 | 111 | | 3-large-1024 | 1024 | 112 | 113 | (openai-completion-models)= 114 | 115 | ## OpenAI completion models 116 | 117 | The `gpt-3.5-turbo-instruct` model is a little different - it is a completion model rather than a chat model, described in [the OpenAI completions documentation](https://platform.openai.com/docs/api-reference/completions/create). 118 | 119 | Completion models can be called with the `-o logprobs 3` option (not supported by chat models) which will cause LLM to store 3 log probabilities for each returned token in the SQLite database. Consult [this issue](https://github.com/simonw/llm/issues/284#issuecomment-1724772704) for details on how to read these values. 120 | 121 | (openai-extra-models)= 122 | 123 | ## Adding more OpenAI models 124 | 125 | OpenAI occasionally release new models with new names. LLM aims to ship new releases to support these, but you can also configure them directly, by adding them to a `extra-openai-models.yaml` configuration file. 126 | 127 | Run this command to find the directory in which this file should be created: 128 | 129 | ```bash 130 | dirname "$(llm logs path)" 131 | ``` 132 | On my Mac laptop I get this: 133 | ``` 134 | ~/Library/Application Support/io.datasette.llm 135 | ``` 136 | Create a file in that directory called `extra-openai-models.yaml`. 137 | 138 | Let's say OpenAI have just released the `gpt-3.5-turbo-0613` model and you want to use it, despite LLM not yet shipping support. You could configure that by adding this to the file: 139 | 140 | ```yaml 141 | - model_id: gpt-3.5-turbo-0613 142 | model_name: gpt-3.5-turbo-0613 143 | aliases: ["0613"] 144 | ``` 145 | The `model_id` is the identifier that will be recorded in the LLM logs. You can use this to specify the model, or you can optionally include a list of aliases for that model. The `model_name` is the actual model identifier that will be passed to the API, which must match exactly what the API expects. 146 | 147 | If the model is a completion model (such as `gpt-3.5-turbo-instruct`) add `completion: true` to the configuration. 148 | 149 | If the model supports structured extraction using json_schema, add `supports_schema: true` to the configuration. 150 | 151 | For reasoning models like `o1` or `o3-mini` add `reasoning: true`. 152 | 153 | With this configuration in place, the following command should run a prompt against the new model: 154 | 155 | ```bash 156 | llm -m 0613 'What is the capital of France?' 157 | ``` 158 | Run `llm models` to confirm that the new model is now available: 159 | ```bash 160 | llm models 161 | ``` 162 | Example output: 163 | ``` 164 | OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt) 165 | OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k) 166 | OpenAI Chat: gpt-4 (aliases: 4, gpt4) 167 | OpenAI Chat: gpt-4-32k (aliases: 4-32k) 168 | OpenAI Chat: gpt-3.5-turbo-0613 (aliases: 0613) 169 | ``` 170 | Running `llm logs -n 1` should confirm that the prompt and response has been correctly logged to the database. 171 | -------------------------------------------------------------------------------- /docs/other-models.md: -------------------------------------------------------------------------------- 1 | (other-models)= 2 | # Other models 3 | 4 | LLM supports OpenAI models by default. You can install {ref}`plugins ` to add support for other models. You can also add additional OpenAI-API-compatible models {ref}`using a configuration file `. 5 | 6 | ## Installing and using a local model 7 | 8 | {ref}`LLM plugins ` can provide local models that run on your machine. 9 | 10 | To install **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)**, providing 17 models from the [GPT4All](https://gpt4all.io/) project, run this: 11 | 12 | ```bash 13 | llm install llm-gpt4all 14 | ``` 15 | Run `llm models` to see the expanded list of available models. 16 | 17 | To run a prompt through one of the models from GPT4All specify it using `-m/--model`: 18 | ```bash 19 | llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?' 20 | ``` 21 | The model will be downloaded and cached the first time you use it. 22 | 23 | Check the {ref}`plugin directory ` for the latest list of available plugins for other models. 24 | 25 | (openai-compatible-models)= 26 | 27 | ## OpenAI-compatible models 28 | 29 | Projects such as [LocalAI](https://localai.io/) offer a REST API that imitates the OpenAI API but can be used to run other models, including models that can be installed on your own machine. These can be added using the same configuration mechanism. 30 | 31 | The `model_id` is the name LLM will use for the model. The `model_name` is the name which needs to be passed to the API - this might differ from the `model_id`, especially if the `model_id` could potentially clash with other installed models. 32 | 33 | The `api_base` key can be used to point the OpenAI client library at a different API endpoint. 34 | 35 | To add the `orca-mini-3b` model hosted by a local installation of [LocalAI](https://localai.io/), add this to your `extra-openai-models.yaml` file: 36 | 37 | ```yaml 38 | - model_id: orca-openai-compat 39 | model_name: orca-mini-3b.ggmlv3 40 | api_base: "http://localhost:8080" 41 | ``` 42 | If the `api_base` is set, the existing configured `openai` API key will not be sent by default. 43 | 44 | You can set `api_key_name` to the name of a key stored using the {ref}`api-keys` feature. 45 | 46 | Add `completion: true` if the model is a completion model that uses a `/completion` as opposed to a `/completion/chat` endpoint. 47 | 48 | If a model does not support streaming, add `can_stream: false` to disable the streaming option. 49 | 50 | If a model supports structured output via JSON schemas, you can add `supports_schema: true` to support this feature. 51 | 52 | If a model is a vision model, you can add `vision: true` to support this feature and use image attachments. 53 | 54 | If a model is an audio model, you can add `audio: true` to support this feature and use audio attachments. 55 | 56 | Having configured the model like this, run `llm models` to check that it installed correctly. You can then run prompts against it like so: 57 | 58 | ```bash 59 | llm -m orca-openai-compat 'What is the capital of France?' 60 | ``` 61 | And confirm they were logged correctly with: 62 | ```bash 63 | llm logs -n 1 64 | ``` 65 | 66 | ### Extra HTTP headers 67 | 68 | Some providers such as [openrouter.ai](https://openrouter.ai/docs) may require the setting of additional HTTP headers. You can set those using the `headers:` key like this: 69 | 70 | ```yaml 71 | - model_id: claude 72 | model_name: anthropic/claude-2 73 | api_base: "https://openrouter.ai/api/v1" 74 | api_key_name: openrouter 75 | headers: 76 | HTTP-Referer: "https://llm.datasette.io/" 77 | X-Title: LLM 78 | ``` 79 | -------------------------------------------------------------------------------- /docs/plugins/directory.md: -------------------------------------------------------------------------------- 1 | (plugin-directory)= 2 | # Plugin directory 3 | 4 | The following plugins are available for LLM. Here's {ref}`how to install them `. 5 | 6 | ## Local models 7 | 8 | These plugins all help you run LLMs directly on your own computer: 9 | 10 | - **[llm-gguf](https://github.com/simonw/llm-gguf)** uses [llama.cpp](https://github.com/ggerganov/llama.cpp) to run models published in the GGUF format. 11 | - **[llm-mlx](https://github.com/simonw/llm-mlx)** (Mac only) uses Apple's MLX framework to provide extremely high performance access to a large number of local models. 12 | - **[llm-ollama](https://github.com/taketwo/llm-ollama)** adds support for local models run using [Ollama](https://ollama.ai/). 13 | - **[llm-llamafile](https://github.com/simonw/llm-llamafile)** adds support for local models that are running locally using [llamafile](https://github.com/Mozilla-Ocho/llamafile). 14 | - **[llm-mlc](https://github.com/simonw/llm-mlc)** can run local models released by the [MLC project](https://mlc.ai/mlc-llm/), including models that can take advantage of the GPU on Apple Silicon M1/M2 devices. 15 | - **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)** adds support for various models released by the [GPT4All](https://gpt4all.io/) project that are optimized to run locally on your own machine. These models include versions of Vicuna, Orca, Falcon and MPT - here's [a full list of models](https://observablehq.com/@simonw/gpt4all-models). 16 | - **[llm-mpt30b](https://github.com/simonw/llm-mpt30b)** adds support for the [MPT-30B](https://huggingface.co/mosaicml/mpt-30b) local model. 17 | 18 | ## Remote APIs 19 | 20 | These plugins can be used to interact with remotely hosted models via their API: 21 | 22 | - **[llm-mistral](https://github.com/simonw/llm-mistral)** adds support for [Mistral AI](https://mistral.ai/)'s language and embedding models. 23 | - **[llm-gemini](https://github.com/simonw/llm-gemini)** adds support for Google's [Gemini](https://ai.google.dev/docs) models. 24 | - **[llm-anthropic](https://github.com/simonw/llm-anthropic)** supports Anthropic's [Claude 3 family](https://www.anthropic.com/news/claude-3-family), [3.5 Sonnet](https://www.anthropic.com/news/claude-3-5-sonnet) and beyond. 25 | - **[llm-command-r](https://github.com/simonw/llm-command-r)** supports Cohere's Command R and [Command R Plus](https://txt.cohere.com/command-r-plus-microsoft-azure/) API models. 26 | - **[llm-reka](https://github.com/simonw/llm-reka)** supports the [Reka](https://www.reka.ai/) family of models via their API. 27 | - **[llm-perplexity](https://github.com/hex/llm-perplexity)** by Alexandru Geana supports the [Perplexity Labs](https://docs.perplexity.ai/) API models, including `llama-3-sonar-large-32k-online` which can search for things online and `llama-3-70b-instruct`. 28 | - **[llm-groq](https://github.com/angerman/llm-groq)** by Moritz Angermann provides access to fast models hosted by [Groq](https://console.groq.com/docs/models). 29 | - **[llm-grok](https://github.com/Hiepler/llm-grok)** by Benedikt Hiepler providing access to Grok model using the xAI API [Grok](https://x.ai/api). 30 | - **[llm-anyscale-endpoints](https://github.com/simonw/llm-anyscale-endpoints)** supports models hosted on the [Anyscale Endpoints](https://app.endpoints.anyscale.com/) platform, including Llama 2 70B. 31 | - **[llm-replicate](https://github.com/simonw/llm-replicate)** adds support for remote models hosted on [Replicate](https://replicate.com/), including Llama 2 from Meta AI. 32 | - **[llm-fireworks](https://github.com/simonw/llm-fireworks)** supports models hosted by [Fireworks AI](https://fireworks.ai/). 33 | - **[llm-openrouter](https://github.com/simonw/llm-openrouter)** provides access to models hosted on [OpenRouter](https://openrouter.ai/). 34 | - **[llm-cohere](https://github.com/Accudio/llm-cohere)** by Alistair Shepherd provides `cohere-generate` and `cohere-summarize` API models, powered by [Cohere](https://cohere.com/). 35 | - **[llm-bedrock](https://github.com/simonw/llm-bedrock)** adds support for Nova by Amazon via Amazon Bedrock. 36 | - **[llm-bedrock-anthropic](https://github.com/sblakey/llm-bedrock-anthropic)** by Sean Blakey adds support for Claude and Claude Instant by Anthropic via Amazon Bedrock. 37 | - **[llm-bedrock-meta](https://github.com/flabat/llm-bedrock-meta)** by Fabian Labat adds support for Llama 2 and Llama 3 by Meta via Amazon Bedrock. 38 | - **[llm-together](https://github.com/wearedevx/llm-together)** adds support for the [Together AI](https://www.together.ai/) extensive family of hosted openly licensed models. 39 | - **[llm-deepseek](https://github.com/abrasumente233/llm-deepseek)** adds support for the [DeepSeek](https://deepseek.com)'s DeepSeek-Chat and DeepSeek-Coder models. 40 | - **[llm-lambda-labs](https://github.com/simonw/llm-lambda-labs)** provides access to models hosted by [Lambda Labs](https://docs.lambdalabs.com/public-cloud/lambda-chat-api/), including the Nous Hermes 3 series. 41 | - **[llm-venice](https://github.com/ar-jan/llm-venice)** provides access to uncensored models hosted by privacy-focused [Venice AI](https://docs.venice.ai/), including Llama 3.1 405B. 42 | 43 | If an API model host provides an OpenAI-compatible API you can also [configure LLM to talk to it](https://llm.datasette.io/en/stable/other-models.html#openai-compatible-models) without needing an extra plugin. 44 | 45 | ## Tools 46 | 47 | The following plugins add new {ref}`tools ` that can be used by models: 48 | 49 | - **[llm-tools-simpleeval](https://github.com/simonw/llm-tools-simpleeval)** implements simple expression support for things like mathematics. 50 | - **[llm-tools-quickjs](https://github.com/simonw/llm-tools-quickjs)** provides access to a sandboxed QuickJS JavaScript interpreter, allowing LLMs to run JavaScript code. The environment persists between calls so the model can set variables and build functions and reuse them later on. 51 | - **[llm-tools-sqlite](https://github.com/simonw/llm-tools-sqlite)** can run read-only SQL queries against local SQLite databases. 52 | - **[llm-tools-datasette](https://github.com/simonw/llm-tools-datasette)** can run SQL queries against a remote [Datasette](https://datasette.io/) instance. 53 | - **[llm-tools-exa](https://github.com/daturkel/llm-tools-exa)** by Dan Turkel can perform web searches and question-answering using [exa.ai](https://exa.ai/). 54 | - **[llm-tools-rag](https://github.com/daturkel/llm-tools-rag)** by Dan Turkel can perform searches over your LLM embedding collections for simple RAG. 55 | 56 | ## Fragments and template loaders 57 | 58 | {ref}`LLM 0.24 ` introduced support for plugins that define `-f prefix:value` or `-t prefix:value` custom loaders for fragments and templates. 59 | 60 | - **[llm-video-frames](https://github.com/simonw/llm-video-frames)** uses `ffmpeg` to turn a video into a sequence of JPEG frames suitable for feeding into a vision model that doesn't support video inputs: `llm -f video-frames:video.mp4 'describe the key scenes in this video'`. 61 | - **[llm-templates-github](https://github.com/simonw/llm-templates-github)** supports loading templates shared on GitHub, e.g. `llm -t gh:simonw/pelican-svg`. 62 | - **[llm-templates-fabric](https://github.com/simonw/llm-templates-fabric)** provides access to the [Fabric](https://github.com/danielmiessler/fabric) collection of prompts: `cat setup.py | llm -t fabric:explain_code`. 63 | - **[llm-fragments-github](https://github.com/simonw/llm-fragments-github)** can load entire GitHub repositories in a single operation: `llm -f github:simonw/files-to-prompt 'explain this code'`. It can also fetch issue threads as Markdown using `llm -f issue:https://github.com/simonw/llm-fragments-github/issues/3`. 64 | - **[llm-hacker-news](https://github.com/simonw/llm-hacker-news)** imports conversations from Hacker News as fragments: `llm -f hn:43615912 'summary with illustrative direct quotes'`. 65 | - **[llm-fragments-pypi](https://github.com/samueldg/llm-fragments-pypi)** loads [PyPI](https://pypi.org/) packages' description and metadata as fragments: `llm -f pypi:ruff "What flake8 plugins does ruff re-implement?"`. 66 | - **[llm-fragments-pdf](https://github.com/daturkel/llm-fragments-pdf)** by Dan Turkel converts PDFs to markdown with [PyMuPDF4LLM](https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/index.html) to use as fragments: `llm -f pdf:something.pdf "what's this about?"`. 67 | - **[llm-fragments-site-text](https://github.com/daturkel/llm-fragments-site-text)** by Dan Turkel converts websites to markdown with [Trafilatura](https://trafilatura.readthedocs.io/en/latest/) to use as fragments: `llm -f site:https://example.com "summarize this"`. 68 | - **[llm-fragments-reader](https://github.com/simonw/llm-fragments-reader)** runs a URL theough the Jina Reader API: `llm -f 'reader:https://simonwillison.net/tags/jina/' summary`. 69 | 70 | ## Embedding models 71 | 72 | {ref}`Embedding models ` are models that can be used to generate and store embedding vectors for text. 73 | 74 | - **[llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers)** adds support for embeddings using the [sentence-transformers](https://www.sbert.net/) library, which provides access to [a wide range](https://www.sbert.net/docs/pretrained_models.html) of embedding models. 75 | - **[llm-clip](https://github.com/simonw/llm-clip)** provides the [CLIP](https://openai.com/research/clip) model, which can be used to embed images and text in the same vector space, enabling text search against images. See [Build an image search engine with llm-clip](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/) for more on this plugin. 76 | - **[llm-embed-jina](https://github.com/simonw/llm-embed-jina)** provides Jina AI's [8K text embedding models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/). 77 | - **[llm-embed-onnx](https://github.com/simonw/llm-embed-onnx)** provides seven embedding models that can be executed using the ONNX model framework. 78 | 79 | ## Extra commands 80 | 81 | - **[llm-cmd](https://github.com/simonw/llm-cmd)** accepts a prompt for a shell command, runs that prompt and populates the result in your shell so you can review it, edit it and then hit `` to execute or `ctrl+c` to cancel. 82 | - **[llm-cmd-comp](https://github.com/CGamesPlay/llm-cmd-comp)** provides a key binding for your shell that will launch a chat to build the command. When ready, hit `` and it will go right back into your shell command line, so you can run it. 83 | - **[llm-python](https://github.com/simonw/llm-python)** adds a `llm python` command for running a Python interpreter in the same virtual environment as LLM. This is useful for debugging, and also provides a convenient way to interact with the LLM {ref}`python-api` if you installed LLM using Homebrew or `pipx`. 84 | - **[llm-cluster](https://github.com/simonw/llm-cluster)** adds a `llm cluster` command for calculating clusters for a collection of embeddings. Calculated clusters can then be passed to a Large Language Model to generate a summary description. 85 | - **[llm-jq](https://github.com/simonw/llm-jq)** lets you pipe in JSON data and a prompt describing a `jq` program, then executes the generated program against the JSON. 86 | 87 | ## Just for fun 88 | 89 | - **[llm-markov](https://github.com/simonw/llm-markov)** adds a simple model that generates output using a [Markov chain](https://en.wikipedia.org/wiki/Markov_chain). This example is used in the tutorial [Writing a plugin to support a new model](https://llm.datasette.io/en/latest/plugins/tutorial-model-plugin.html). 90 | -------------------------------------------------------------------------------- /docs/plugins/index.md: -------------------------------------------------------------------------------- 1 | (plugins)= 2 | # Plugins 3 | 4 | LLM plugins can enhance LLM by making alternative Large Language Models available, either via API or by running the models locally on your machine. 5 | 6 | Plugins can also add new commands to the `llm` CLI tool. 7 | 8 | The {ref}`plugin directory ` lists available plugins that you can install and use. 9 | 10 | {ref}`tutorial-model-plugin` describes how to build a new plugin in detail. 11 | 12 | ```{toctree} 13 | --- 14 | maxdepth: 3 15 | --- 16 | installing-plugins 17 | directory 18 | plugin-hooks 19 | tutorial-model-plugin 20 | advanced-model-plugins 21 | plugin-utilities 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/plugins/installing-plugins.md: -------------------------------------------------------------------------------- 1 | (installing-plugins)= 2 | # Installing plugins 3 | 4 | Plugins must be installed in the same virtual environment as LLM itself. 5 | 6 | You can find names of plugins to install in the {ref}`plugin directory ` 7 | 8 | Use the `llm install` command (a thin wrapper around `pip install`) to install plugins in the correct environment: 9 | ```bash 10 | llm install llm-gpt4all 11 | ``` 12 | Plugins can be uninstalled with `llm uninstall`: 13 | ```bash 14 | llm uninstall llm-gpt4all -y 15 | ``` 16 | The `-y` flag skips asking for confirmation. 17 | 18 | You can see additional models that have been added by plugins by running: 19 | ```bash 20 | llm models 21 | ``` 22 | Or add `--options` to include details of the options available for each model: 23 | ```bash 24 | llm models --options 25 | ``` 26 | To run a prompt against a newly installed model, pass its name as the `-m/--model` option: 27 | ```bash 28 | llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?' 29 | ``` 30 | 31 | ## Listing installed plugins 32 | 33 | Run `llm plugins` to list installed plugins: 34 | 35 | ```bash 36 | llm plugins 37 | ``` 38 | ```json 39 | [ 40 | { 41 | "name": "llm-anthropic", 42 | "hooks": [ 43 | "register_models" 44 | ], 45 | "version": "0.11" 46 | }, 47 | { 48 | "name": "llm-gguf", 49 | "hooks": [ 50 | "register_commands", 51 | "register_models" 52 | ], 53 | "version": "0.1a0" 54 | }, 55 | { 56 | "name": "llm-clip", 57 | "hooks": [ 58 | "register_commands", 59 | "register_embedding_models" 60 | ], 61 | "version": "0.1" 62 | }, 63 | { 64 | "name": "llm-cmd", 65 | "hooks": [ 66 | "register_commands" 67 | ], 68 | "version": "0.2a0" 69 | }, 70 | { 71 | "name": "llm-gemini", 72 | "hooks": [ 73 | "register_embedding_models", 74 | "register_models" 75 | ], 76 | "version": "0.3" 77 | } 78 | ] 79 | ``` 80 | 81 | (llm-load-plugins)= 82 | ## Running with a subset of plugins 83 | 84 | By default, LLM will load all plugins that are installed in the same virtual environment as LLM itself. 85 | 86 | You can control the set of plugins that is loaded using the `LLM_LOAD_PLUGINS` environment variable. 87 | 88 | Set that to the empty string to disable all plugins: 89 | 90 | ```bash 91 | LLM_LOAD_PLUGINS='' llm ... 92 | ``` 93 | Or to a comma-separated list of plugin names to load only those plugins: 94 | 95 | ```bash 96 | LLM_LOAD_PLUGINS='llm-gpt4all,llm-cluster' llm ... 97 | ``` 98 | You can use the `llm plugins` command to check that it is working correctly: 99 | ``` 100 | LLM_LOAD_PLUGINS='' llm plugins 101 | ``` 102 | -------------------------------------------------------------------------------- /docs/plugins/llm-markov/llm_markov.py: -------------------------------------------------------------------------------- 1 | import llm 2 | import random 3 | import time 4 | from typing import Optional 5 | from pydantic import field_validator, Field 6 | 7 | 8 | @llm.hookimpl 9 | def register_models(register): 10 | register(Markov()) 11 | 12 | 13 | def build_markov_table(text): 14 | words = text.split() 15 | transitions = {} 16 | # Loop through all but the last word 17 | for i in range(len(words) - 1): 18 | word = words[i] 19 | next_word = words[i + 1] 20 | transitions.setdefault(word, []).append(next_word) 21 | return transitions 22 | 23 | 24 | def generate(transitions, length, start_word=None): 25 | all_words = list(transitions.keys()) 26 | next_word = start_word or random.choice(all_words) 27 | for i in range(length): 28 | yield next_word 29 | options = transitions.get(next_word) or all_words 30 | next_word = random.choice(options) 31 | 32 | 33 | class Markov(llm.Model): 34 | model_id = "markov" 35 | can_stream = True 36 | 37 | class Options(llm.Options): 38 | length: Optional[int] = Field( 39 | description="Number of words to generate", default=None 40 | ) 41 | delay: Optional[float] = Field( 42 | description="Seconds to delay between each token", default=None 43 | ) 44 | 45 | @field_validator("length") 46 | def validate_length(cls, length): 47 | if length is None: 48 | return None 49 | if length < 2: 50 | raise ValueError("length must be >= 2") 51 | return length 52 | 53 | @field_validator("delay") 54 | def validate_delay(cls, delay): 55 | if delay is None: 56 | return None 57 | if not 0 <= delay <= 10: 58 | raise ValueError("delay must be between 0 and 10") 59 | return delay 60 | 61 | def execute(self, prompt, stream, response, conversation): 62 | text = prompt.prompt 63 | transitions = build_markov_table(text) 64 | length = prompt.options.length or 20 65 | for word in generate(transitions, length): 66 | yield word + " " 67 | if prompt.options.delay: 68 | time.sleep(prompt.options.delay) 69 | -------------------------------------------------------------------------------- /docs/plugins/llm-markov/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm-markov" 3 | version = "0.1" 4 | 5 | [project.entry-points.llm] 6 | markov = "llm_markov" -------------------------------------------------------------------------------- /docs/plugins/plugin-hooks.md: -------------------------------------------------------------------------------- 1 | (plugin-hooks)= 2 | # Plugin hooks 3 | 4 | Plugins use **plugin hooks** to customize LLM's behavior. These hooks are powered by the [Pluggy plugin system](https://pluggy.readthedocs.io/). 5 | 6 | Each plugin can implement one or more hooks using the @hookimpl decorator against one of the hook function names described on this page. 7 | 8 | LLM imitates the Datasette plugin system. The [Datasette plugin documentation](https://docs.datasette.io/en/stable/writing_plugins.html) describes how plugins work. 9 | 10 | (plugin-hooks-register-commands)= 11 | ## register_commands(cli) 12 | 13 | This hook adds new commands to the `llm` CLI tool - for example `llm extra-command`. 14 | 15 | This example plugin adds a new `hello-world` command that prints "Hello world!": 16 | 17 | ```python 18 | from llm import hookimpl 19 | import click 20 | 21 | @hookimpl 22 | def register_commands(cli): 23 | @cli.command(name="hello-world") 24 | def hello_world(): 25 | "Print hello world" 26 | click.echo("Hello world!") 27 | ``` 28 | This new command will be added to `llm --help` and can be run using `llm hello-world`. 29 | 30 | (plugin-hooks-register-models)= 31 | ## register_models(register) 32 | 33 | This hook can be used to register one or more additional models. 34 | 35 | ```python 36 | import llm 37 | 38 | @llm.hookimpl 39 | def register_models(register): 40 | register(HelloWorld()) 41 | 42 | class HelloWorld(llm.Model): 43 | model_id = "helloworld" 44 | 45 | def execute(self, prompt, stream, response): 46 | return ["hello world"] 47 | ``` 48 | If your model includes an async version, you can register that too: 49 | 50 | ```python 51 | class AsyncHelloWorld(llm.AsyncModel): 52 | model_id = "helloworld" 53 | 54 | async def execute(self, prompt, stream, response): 55 | return ["hello world"] 56 | 57 | @llm.hookimpl 58 | def register_models(register): 59 | register(HelloWorld(), AsyncHelloWorld(), aliases=("hw",)) 60 | ``` 61 | This demonstrates how to register a model with both sync and async versions, and how to specify an alias for that model. 62 | 63 | The {ref}`model plugin tutorial ` describes how to use this hook in detail. Asynchronous models {ref}`are described here `. 64 | 65 | (plugin-hooks-register-embedding-models)= 66 | ## register_embedding_models(register) 67 | 68 | This hook can be used to register one or more additional embedding models, as described in {ref}`embeddings-writing-plugins`. 69 | 70 | ```python 71 | import llm 72 | 73 | @llm.hookimpl 74 | def register_embedding_models(register): 75 | register(HelloWorld()) 76 | 77 | class HelloWorld(llm.EmbeddingModel): 78 | model_id = "helloworld" 79 | 80 | def embed_batch(self, items): 81 | return [[1, 2, 3], [4, 5, 6]] 82 | ``` 83 | 84 | (plugin-hooks-register-tools)= 85 | ## register_tools(register) 86 | 87 | This hook can register one or more tool functions for use with LLM. See {ref}`the tools documentation ` for more details. 88 | 89 | This example registers two tools: `upper` and `count_character_in_word`. 90 | 91 | ```python 92 | import llm 93 | 94 | def upper(text: str) -> str: 95 | """Convert text to uppercase.""" 96 | return text.upper() 97 | 98 | def count_char(text: str, character: str) -> int: 99 | """Count the number of occurrences of a character in a word.""" 100 | return text.count(character) 101 | 102 | @llm.hookimpl 103 | def register_tools(register): 104 | register(upper) 105 | # Here the name= argument is used to specify a different name for the tool: 106 | register(count_char, name="count_character_in_word") 107 | ``` 108 | 109 | Tools can also be implemented as classes, as described in {ref}`Toolbox classes ` in the Python API documentation. 110 | 111 | You can register classes like the `Memory` example from there by passing the class (_not_ an instance of the class) to `register()`: 112 | 113 | ```python 114 | import llm 115 | 116 | class Memory(llm.Toolbox): 117 | ... 118 | 119 | @llm.hookimpl 120 | def register_tools(register): 121 | register(Memory) 122 | ``` 123 | Once installed, this tool can be used like so: 124 | 125 | ```bash 126 | llm chat -T Memory 127 | ``` 128 | If a tool name starts with a capital letter it is assumed to be a toolbox class, not a regular tool function. 129 | 130 | Here's an example session with the Memory tool: 131 | ``` 132 | Chatting with gpt-4.1-mini 133 | Type 'exit' or 'quit' to exit 134 | Type '!multi' to enter multiple lines, then '!end' to finish 135 | Type '!edit' to open your default editor and modify the prompt 136 | Type '!fragment [ ...]' to insert one or more fragments 137 | > Remember my name is Henry 138 | 139 | Tool call: Memory_set({'key': 'user_name', 'value': 'Henry'}) 140 | null 141 | 142 | Got it, Henry! I'll remember your name. How can I assist you today? 143 | > what keys are there? 144 | 145 | Tool call: Memory_keys({}) 146 | [ 147 | "user_name" 148 | ] 149 | 150 | Currently, there is one key stored: "user_name". Would you like to add or retrieve any information? 151 | > read it 152 | 153 | Tool call: Memory_get({'key': 'user_name'}) 154 | Henry 155 | 156 | The value stored under the key "user_name" is Henry. Is there anything else you'd like to do? 157 | > add Barrett to it 158 | 159 | Tool call: Memory_append({'key': 'user_name', 'value': 'Barrett'}) 160 | null 161 | 162 | I have added "Barrett" to the key "user_name". If you want, I can now show you the updated value. 163 | > show value 164 | 165 | Tool call: Memory_get({'key': 'user_name'}) 166 | Henry 167 | Barrett 168 | 169 | The value stored under the key "user_name" is now: 170 | Henry 171 | Barrett 172 | 173 | Is there anything else you would like to do? 174 | ``` 175 | 176 | (plugin-hooks-register-template-loaders)= 177 | ## register_template_loaders(register) 178 | 179 | Plugins can register new {ref}`template loaders ` using the `register_template_loaders` hook. 180 | 181 | Template loaders work with the `llm -t prefix:name` syntax. The prefix specifies the loader, then the registered loader function is called with the name as an argument. The loader function should return an `llm.Template()` object. 182 | 183 | This example plugin registers `my-prefix` as a new template loader. Once installed it can be used like this: 184 | 185 | ```bash 186 | llm -t my-prefix:my-template 187 | ``` 188 | Here's the Python code: 189 | 190 | ```python 191 | import llm 192 | 193 | @llm.hookimpl 194 | def register_template_loaders(register): 195 | register("my-prefix", my_template_loader) 196 | 197 | def my_template_loader(template_path: str) -> llm.Template: 198 | """ 199 | Documentation for the template loader goes here. It will be displayed 200 | when users run the 'llm templates loaders' command. 201 | """ 202 | try: 203 | # Your logic to fetch the template content 204 | # This is just an example: 205 | prompt = "This is a sample prompt for {}".format(template_path) 206 | system = "You are an assistant specialized in {}".format(template_path) 207 | 208 | # Return a Template object with the required fields 209 | return llm.Template( 210 | name=template_path, 211 | prompt=prompt, 212 | system=system, 213 | ) 214 | except Exception as e: 215 | # Raise a ValueError with a clear message if the template cannot be found 216 | raise ValueError(f"Template '{template_path}' could not be loaded: {str(e)}") 217 | ``` 218 | The `llm.Template` class has the following constructor: 219 | 220 | ```{eval-rst} 221 | .. autoclass:: llm.Template 222 | ``` 223 | 224 | The loader function should raise a `ValueError` if the template cannot be found or loaded correctly, providing a clear error message. 225 | 226 | Note that `functions:` provided by templates using this plugin hook will not be made available, to avoid the risk of plugin hooks that load templates from remote sources introducing arbitrary code execution vulnerabilities. 227 | 228 | (plugin-hooks-register-fragment-loaders)= 229 | ## register_fragment_loaders(register) 230 | 231 | Plugins can register new fragment loaders using the `register_template_loaders` hook. These can then be used with the `llm -f prefix:argument` syntax. 232 | 233 | Fragment loader plugins differ from template loader plugins in that you can stack more than one fragment loader call together in the same prompt. 234 | 235 | A fragment loader can return one or more string fragments or attachments, or a mixture of the two. The fragments will be concatenated together into the prompt string, while any attachments will be added to the list of attachments to be sent to the model. 236 | 237 | The `prefix` specifies the loader. The `argument` will be passed to that registered callback.. 238 | 239 | The callback works in a very similar way to template loaders, but returns either a single `llm.Fragment`, a list of `llm.Fragment` objects, a single `llm.Attachment`, or a list that can mix `llm.Attachment` and `llm.Fragment` objects. 240 | 241 | The `llm.Fragment` constructor takes a required string argument (the content of the fragment) and an optional second `source` argument, which is a string that may be displayed as debug information. For files this is a path and for URLs it is a URL. Your plugin can use anything you like for the `source` value. 242 | 243 | See {ref}`the Python API documentation for attachments ` for details of the `llm.Attachment` class. 244 | 245 | Here is some example code: 246 | 247 | ```python 248 | import llm 249 | 250 | @llm.hookimpl 251 | def register_fragment_loaders(register): 252 | register("my-fragments", my_fragment_loader) 253 | 254 | 255 | def my_fragment_loader(argument: str) -> llm.Fragment: 256 | """ 257 | Documentation for the fragment loader goes here. It will be displayed 258 | when users run the 'llm fragments loaders' command. 259 | """ 260 | try: 261 | fragment = "Fragment content for {}".format(argument) 262 | source = "my-fragments:{}".format(argument) 263 | return llm.Fragment(fragment, source) 264 | except Exception as ex: 265 | # Raise a ValueError with a clear message if the fragment cannot be loaded 266 | raise ValueError( 267 | f"Fragment 'my-fragments:{argument}' could not be loaded: {str(ex)}" 268 | ) 269 | 270 | # Or for the case where you want to return multiple fragments and attachments: 271 | def my_fragment_loader(argument: str) -> list[llm.Fragment]: 272 | "Docs go here." 273 | return [ 274 | llm.Fragment("Fragment 1 content", "my-fragments:{argument}"), 275 | llm.Fragment("Fragment 2 content", "my-fragments:{argument}"), 276 | llm.Attachment(path="/path/to/image.png"), 277 | ] 278 | ``` 279 | A plugin like this one can be called like so: 280 | ```bash 281 | llm -f my-fragments:argument 282 | ``` 283 | If multiple fragments are returned they will be used as if the user passed multiple `-f X` arguments to the command. 284 | 285 | Multiple fragments are particularly useful for things like plugins that return every file in a directory. If these were concatenated together by the plugin, a change to a single file would invalidate the de-duplicatino cache for that whole fragment. Giving each file its own fragment means we can avoid storing multiple copies of that full collection if only a single file has changed. 286 | -------------------------------------------------------------------------------- /docs/plugins/plugin-utilities.md: -------------------------------------------------------------------------------- 1 | (plugin-utilities)= 2 | # Utility functions for plugins 3 | 4 | LLM provides some utility functions that may be useful to plugins. 5 | 6 | (plugin-utilities-get-key)= 7 | ## llm.get_key() 8 | 9 | This method can be used to look up secrets that users have stored using the {ref}`llm keys set ` command. If your plugin needs to access an API key or other secret this can be a convenient way to provide that. 10 | 11 | This returns either a string containing the key or `None` if the key could not be resolved. 12 | 13 | Use the `alias="name"` option to retrieve the key set with that alias: 14 | 15 | ```python 16 | github_key = llm.get_key(alias="github") 17 | ``` 18 | You can also add `env="ENV_VAR"` to fall back to looking in that environment variable if the key has not been configured: 19 | ```python 20 | github_key = llm.get_key(alias="github", env="GITHUB_TOKEN") 21 | ``` 22 | In some cases you may allow users to provide a key as input, where they could input either the key itself or specify an alias to lookup in `keys.json`. Use the `input=` parameter for that: 23 | 24 | ```python 25 | github_key = llm.get_key(input=input_from_user, alias="github", env="GITHUB_TOKEN") 26 | ``` 27 | 28 | An previous version of function used positional arguments in a confusing order. These are still supported but the new keyword arguments are recommended as a better way to use `llm.get_key()` going forward. 29 | 30 | (plugin-utilities-user-dir)= 31 | ## llm.user_dir() 32 | 33 | LLM stores various pieces of logging and configuration data in a directory on the user's machine. 34 | 35 | On macOS this directory is `~/Library/Application Support/io.datasette.llm`, but this will differ on other operating systems. 36 | 37 | The `llm.user_dir()` function returns the path to this directory as a `pathlib.Path` object, after creating that directory if it does not yet exist. 38 | 39 | Plugins can use this to store their own data in a subdirectory of this directory. 40 | 41 | ```python 42 | import llm 43 | user_dir = llm.user_dir() 44 | plugin_dir = data_path = user_dir / "my-plugin" 45 | plugin_dir.mkdir(exist_ok=True) 46 | data_path = plugin_dir / "plugin-data.db" 47 | ``` 48 | 49 | (plugin-utilities-modelerror)= 50 | ## llm.ModelError 51 | 52 | If your model encounters an error that should be reported to the user you can raise this exception. For example: 53 | 54 | ```python 55 | import llm 56 | 57 | raise ModelError("MPT model not installed - try running 'llm mpt30b download'") 58 | ``` 59 | This will be caught by the CLI layer and displayed to the user as an error message. 60 | 61 | (plugin-utilities-response-fake)= 62 | ## Response.fake() 63 | 64 | When writing tests for a model it can be useful to generate fake response objects, for example in this test from [llm-mpt30b](https://github.com/simonw/llm-mpt30b): 65 | 66 | ```python 67 | def test_build_prompt_conversation(): 68 | model = llm.get_model("mpt") 69 | conversation = model.conversation() 70 | conversation.responses = [ 71 | llm.Response.fake(model, "prompt 1", "system 1", "response 1"), 72 | llm.Response.fake(model, "prompt 2", None, "response 2"), 73 | llm.Response.fake(model, "prompt 3", None, "response 3"), 74 | ] 75 | lines = model.build_prompt(llm.Prompt("prompt 4", model), conversation) 76 | assert lines == [ 77 | "<|im_start|>system\system 1<|im_end|>\n", 78 | "<|im_start|>user\nprompt 1<|im_end|>\n", 79 | "<|im_start|>assistant\nresponse 1<|im_end|>\n", 80 | "<|im_start|>user\nprompt 2<|im_end|>\n", 81 | "<|im_start|>assistant\nresponse 2<|im_end|>\n", 82 | "<|im_start|>user\nprompt 3<|im_end|>\n", 83 | "<|im_start|>assistant\nresponse 3<|im_end|>\n", 84 | "<|im_start|>user\nprompt 4<|im_end|>\n", 85 | "<|im_start|>assistant\n", 86 | ] 87 | ``` 88 | The signature of `llm.Response.fake()` is: 89 | 90 | ```python 91 | def fake(cls, model: Model, prompt: str, system: str, response: str): 92 | ``` 93 | -------------------------------------------------------------------------------- /docs/related-tools.md: -------------------------------------------------------------------------------- 1 | (related-tools)= 2 | # Related tools 3 | 4 | The following tools are designed to be used with LLM: 5 | 6 | (related-tools-strip-tags)= 7 | ## strip-tags 8 | 9 | [strip-tags](https://github.com/simonw/strip-tags) is a command for stripping tags from HTML. This is useful when working with LLMs because HTML tags can use up a lot of your token budget. 10 | 11 | Here's how to summarize the front page of the New York Times, by both stripping tags and filtering to just the elements with `class="story-wrapper"`: 12 | 13 | ```bash 14 | curl -s https://www.nytimes.com/ \ 15 | | strip-tags .story-wrapper \ 16 | | llm -s 'summarize the news' 17 | ``` 18 | 19 | [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/) describes ways to use `strip-tags` in more detail. 20 | 21 | (related-tools-ttok)= 22 | ## ttok 23 | 24 | [ttok](https://github.com/simonw/ttok) is a command-line tool for counting OpenAI tokens. You can use it to check if input is likely to fit in the token limit for GPT 3.5 or GPT4: 25 | 26 | ```bash 27 | cat my-file.txt | ttok 28 | ``` 29 | ``` 30 | 125 31 | ``` 32 | It can also truncate input down to a desired number of tokens: 33 | ```bash 34 | ttok This is too many tokens -t 3 35 | ``` 36 | ``` 37 | This is too 38 | ``` 39 | This is useful for truncating a large document down to a size where it can be processed by an LLM. 40 | 41 | (related-tools-symbex)= 42 | ## Symbex 43 | 44 | [Symbex](https://github.com/simonw/symbex) is a tool for searching for symbols in Python codebases. It's useful for extracting just the code for a specific problem and then piping that into LLM for explanation, refactoring or other tasks. 45 | 46 | Here's how to use it to find all functions that match `test*csv*` and use those to guess what the software under test does: 47 | 48 | ```bash 49 | symbex 'test*csv*' | \ 50 | llm --system 'based on these tests guess what this tool does' 51 | ``` 52 | It can also be used to export symbols in a format that can be piped to {ref}`llm embed-multi ` in order to create embeddings: 53 | ```bash 54 | symbex '*' '*:*' --nl | \ 55 | llm embed-multi symbols - \ 56 | --format nl --database embeddings.db --store 57 | ``` 58 | For more examples see [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/). 59 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==7.2.6 2 | furo==2023.9.10 3 | sphinx-autobuild 4 | sphinx-copybutton 5 | sphinx-markdown-builder==0.6.8 6 | myst-parser 7 | cogapp 8 | -------------------------------------------------------------------------------- /docs/setup.md: -------------------------------------------------------------------------------- 1 | # Setup 2 | 3 | ## Installation 4 | 5 | Install this tool using `pip`: 6 | ```bash 7 | pip install llm 8 | ``` 9 | Or using [pipx](https://pypa.github.io/pipx/): 10 | ```bash 11 | pipx install llm 12 | ``` 13 | Or using [uv](https://docs.astral.sh/uv/guides/tools/) ({ref}`more tips below `): 14 | ```bash 15 | uv tool install llm 16 | ``` 17 | Or using [Homebrew](https://brew.sh/) (see {ref}`warning note `): 18 | ```bash 19 | brew install llm 20 | ``` 21 | 22 | ## Upgrading to the latest version 23 | 24 | If you installed using `pip`: 25 | ```bash 26 | pip install -U llm 27 | ``` 28 | For `pipx`: 29 | ```bash 30 | pipx upgrade llm 31 | ``` 32 | For `uv`: 33 | ```bash 34 | uv tool upgrade llm 35 | ``` 36 | For Homebrew: 37 | ```bash 38 | brew upgrade llm 39 | ``` 40 | If the latest version is not yet available on Homebrew you can upgrade like this instead: 41 | ```bash 42 | llm install -U llm 43 | ``` 44 | 45 | (setup-uvx)= 46 | ## Using uvx 47 | 48 | If you have [uv](https://docs.astral.sh/uv/) installed you can also use the `uvx` command to try LLM without first installing it like this: 49 | 50 | ```bash 51 | export OPENAI_API_KEY='sx-...' 52 | uvx llm 'fun facts about skunks' 53 | ``` 54 | This will install and run LLM using a temporary virtual environment. 55 | 56 | You can use the `--with` option to add extra plugins. To use Anthropic's models, for example: 57 | ```bash 58 | export ANTHROPIC_API_KEY='...' 59 | uvx --with llm-anthropic llm -m claude-3.5-haiku 'fun facts about skunks' 60 | ``` 61 | All of the usual LLM commands will work with `uvx llm`. Here's how to set your OpenAI key without needing an environment variable for example: 62 | ```bash 63 | uvx llm keys set openai 64 | # Paste key here 65 | ``` 66 | 67 | (homebrew-warning)= 68 | ## A note about Homebrew and PyTorch 69 | 70 | The version of LLM packaged for Homebrew currently uses Python 3.12. The PyTorch project do not yet have a stable release of PyTorch for that version of Python. 71 | 72 | This means that LLM plugins that depend on PyTorch such as [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) may not install cleanly with the Homebrew version of LLM. 73 | 74 | You can workaround this by manually installing PyTorch before installing `llm-sentence-transformers`: 75 | 76 | ```bash 77 | llm install llm-python 78 | llm python -m pip install \ 79 | --pre torch torchvision \ 80 | --index-url https://download.pytorch.org/whl/nightly/cpu 81 | llm install llm-sentence-transformers 82 | ``` 83 | This should produce a working installation of that plugin. 84 | 85 | ## Installing plugins 86 | 87 | {ref}`plugins` can be used to add support for other language models, including models that can run on your own device. 88 | 89 | For example, the [llm-gpt4all](https://github.com/simonw/llm-gpt4all) plugin adds support for 17 new models that can be installed on your own machine. You can install that like so: 90 | ```bash 91 | llm install llm-gpt4all 92 | ``` 93 | 94 | (api-keys)= 95 | ## API key management 96 | 97 | Many LLM models require an API key. These API keys can be provided to this tool using several different mechanisms. 98 | 99 | You can obtain an API key for OpenAI's language models from [the API keys page](https://platform.openai.com/api-keys) on their site. 100 | 101 | ### Saving and using stored keys 102 | 103 | The easiest way to store an API key is to use the `llm keys set` command: 104 | 105 | ```bash 106 | llm keys set openai 107 | ``` 108 | You will be prompted to enter the key like this: 109 | ``` 110 | % llm keys set openai 111 | Enter key: 112 | ``` 113 | Once stored, this key will be automatically used for subsequent calls to the API: 114 | 115 | ```bash 116 | llm "Five ludicrous names for a pet lobster" 117 | ``` 118 | 119 | You can list the names of keys that have been set using this command: 120 | 121 | ```bash 122 | llm keys 123 | ``` 124 | 125 | Keys that are stored in this way live in a file called `keys.json`. This file is located at the path shown when you run the following command: 126 | 127 | ```bash 128 | llm keys path 129 | ``` 130 | 131 | On macOS this will be `~/Library/Application Support/io.datasette.llm/keys.json`. On Linux it may be something like `~/.config/io.datasette.llm/keys.json`. 132 | 133 | ### Passing keys using the --key option 134 | 135 | Keys can be passed directly using the `--key` option, like this: 136 | 137 | ```bash 138 | llm "Five names for pet weasels" --key sk-my-key-goes-here 139 | ``` 140 | You can also pass the alias of a key stored in the `keys.json` file. For example, if you want to maintain a personal API key you could add that like this: 141 | ```bash 142 | llm keys set personal 143 | ``` 144 | And then use it for prompts like so: 145 | 146 | ```bash 147 | llm "Five friendly names for a pet skunk" --key personal 148 | ``` 149 | 150 | ### Keys in environment variables 151 | 152 | Keys can also be set using an environment variable. These are different for different models. 153 | 154 | For OpenAI models the key will be read from the `OPENAI_API_KEY` environment variable. 155 | 156 | The environment variable will be used if no `--key` option is passed to the command and there is not a key configured in `keys.json` 157 | 158 | To use an environment variable in place of the `keys.json` key run the prompt like this: 159 | ```bash 160 | llm 'my prompt' --key $OPENAI_API_KEY 161 | ``` 162 | 163 | ## Configuration 164 | 165 | You can configure LLM in a number of different ways. 166 | 167 | (setup-default-model)= 168 | ### Setting a custom default model 169 | 170 | The model used when calling `llm` without the `-m/--model` option defaults to `gpt-4o-mini` - the fastest and least expensive OpenAI model. 171 | 172 | You can use the `llm models default` command to set a different default model. For GPT-4o (slower and more expensive, but more capable) run this: 173 | 174 | ```bash 175 | llm models default gpt-4o 176 | ``` 177 | You can view the current model by running this: 178 | ``` 179 | llm models default 180 | ``` 181 | Any of the supported aliases for a model can be passed to this command. 182 | 183 | ### Setting a custom directory location 184 | 185 | This tool stores various files - prompt templates, stored keys, preferences, a database of logs - in a directory on your computer. 186 | 187 | On macOS this is `~/Library/Application Support/io.datasette.llm/`. 188 | 189 | On Linux it may be something like `~/.config/io.datasette.llm/`. 190 | 191 | You can set a custom location for this directory by setting the `LLM_USER_PATH` environment variable: 192 | 193 | ```bash 194 | export LLM_USER_PATH=/path/to/my/custom/directory 195 | ``` 196 | ### Turning SQLite logging on and off 197 | 198 | By default, LLM will log every prompt and response you make to a SQLite database - see {ref}`logging` for more details. 199 | 200 | You can turn this behavior off by default by running: 201 | ```bash 202 | llm logs off 203 | ``` 204 | Or turn it back on again with: 205 | ``` 206 | llm logs on 207 | ``` 208 | Run `llm logs status` to see the current states of the setting. -------------------------------------------------------------------------------- /docs/tools.md: -------------------------------------------------------------------------------- 1 | (tools)= 2 | 3 | # Tools 4 | 5 | Many Large Language Models have been trained to execute tools as part of responding to a prompt. LLM supports tool usage with both the command-line interface and the Python API. 6 | 7 | Exposing tools to LLMs **carries risks**! Be sure to read the {ref}`warning below `. 8 | 9 | (tools-how-they-work)= 10 | 11 | ## How tools work 12 | 13 | A tool is effectively a function that the model can request to be executed. Here's how that works: 14 | 15 | 1. The initial prompt to the model includes a list of available tools, containing their names, descriptions and parameters. 16 | 2. The model can choose to call one (or sometimes more than one) of those tools, returning a request for the tool to execute. 17 | 3. The code that calls the model - in this case LLM itself - then executes the specified tool with the provided arguments. 18 | 4. LLM prompts the model a second time, this time including the output of the tool execution. 19 | 5. The model can then use that output to generate its next response. 20 | 21 | This sequence can run several times in a loop, allowing the LLM to access data, act on that data and then pass that data off to other tools for further processing. 22 | 23 | :::{admonition} Tools can be dangerous 24 | :class: danger 25 | 26 | (tools-warning)= 27 | 28 | ## Warning: Tools can be dangerous 29 | 30 | Applications built on top of LLMs suffer from a class of attacks called [prompt injection](https://simonwillison.net/tags/prompt-injection/) attacks. These occur when a malicious third party injects content into the LLM which causes it to take tool-based actions that act against the interests of the user of that application. 31 | 32 | Be very careful about which tools you enable when you potentially might be exposed to untrusted sources of content - web pages, GitHub issues posted by other people, email and messages that have been sent to you that could come from an attacker. 33 | 34 | Watch out for the **lethal trifecta** of prompt injection exfiltration attacks. If your tool-enabled LLM has the following: 35 | 36 | - access to private data 37 | - exposure to malicious instructions 38 | - the ability to exfiltrate information 39 | 40 | Anyone who can feed malicious instructions into your LLM - by leaving them on a web page it visits, or sending an email to an inbox that it monitors - could be able to trick your LLM into using other tools to access your private information and then exfiltrate (pass out) that data to somewhere the attacker can see it. 41 | ::: 42 | 43 | (tools-trying-out)= 44 | 45 | ## Trying out tools 46 | 47 | LLM comes with a default tool installed, called `llm_version`. You can try that out like this: 48 | 49 | ```bash 50 | llm --tool llm_version "What version of LLM is this?" --td 51 | ``` 52 | You can also use `-T llm_version` as a shortcut for `--tool llm_version`. 53 | 54 | The output should look like this: 55 | ``` 56 | Tool call: llm_version({}) 57 | 0.26a0 58 | 59 | The installed version of the LLM is 0.26a0. 60 | ``` 61 | Further tools can be installed using plugins, or you can use the `llm --functions` option to pass tools implemented as PYthon functions directly, as {ref}`described here `. 62 | 63 | (tools-implementation)= 64 | 65 | ## LLM's implementation of tools 66 | 67 | In LLM every tool is a defined as a Python function. The function can take any number of arguments and can return a string or an object that can be converted to a string. 68 | 69 | Tool functions should include a docstring that describes what the function does. This docstring will become the description that is passed to the model. 70 | 71 | Tools can also be defined as {ref}`toolbox classes `, a subclass of `llm.Toolbox` that allows multiple related tools to be bundled together. Toolbox classes can be be configured when they are instantiated, and can also maintain state in between multiple tool calls. 72 | 73 | The Python API can accept functions directly. The command-line interface has two ways for tools to be defined: via plugins that implement the {ref}`register_tools() plugin hook `, or directly on the command-line using the `--functions` argument to specify a block of Python code defining one or more functions - or a path to a Python file containing the same. 74 | 75 | You can use tools {ref}`with the LLM command-line tool ` or {ref}`with the Python API `. 76 | 77 | (tools-default)= 78 | 79 | ## Default tools 80 | 81 | LLM includes some default tools for you to try out: 82 | 83 | - `llm_version()` returns the current version of LLM 84 | - `llm_time()` returns the current local and UTC time 85 | 86 | Try them like this: 87 | 88 | ```bash 89 | llm -T llm_version -T llm_time 'Give me the current time and LLM version' --td 90 | ``` 91 | 92 | (tools-tips)= 93 | 94 | ## Tips for implementing tools 95 | 96 | Consult the {ref}`register_tools() plugin hook ` documentation for examples of how to implement tools in plugins. 97 | 98 | If your plugin needs access to API secrets I recommend storing those using `llm keys set api-name` and then reading them using the {ref}`plugin-utilities-get-key` utility function. This avoids secrets being logged to the database as part of tool calls. 99 | 100 | 101 | -------------------------------------------------------------------------------- /llm/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import cli 2 | 3 | if __name__ == "__main__": 4 | cli() 5 | -------------------------------------------------------------------------------- /llm/default_plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/llm/2292d7a56df25b7fd457b53bcc7d5cfccdf1821c/llm/default_plugins/__init__.py -------------------------------------------------------------------------------- /llm/default_plugins/default_tools.py: -------------------------------------------------------------------------------- 1 | import llm 2 | from llm.tools import llm_time, llm_version 3 | 4 | 5 | @llm.hookimpl 6 | def register_tools(register): 7 | register(llm_version) 8 | register(llm_time) 9 | -------------------------------------------------------------------------------- /llm/embeddings_migrations.py: -------------------------------------------------------------------------------- 1 | from sqlite_migrate import Migrations 2 | import hashlib 3 | import time 4 | 5 | embeddings_migrations = Migrations("llm.embeddings") 6 | 7 | 8 | @embeddings_migrations() 9 | def m001_create_tables(db): 10 | db["collections"].create({"id": int, "name": str, "model": str}, pk="id") 11 | db["collections"].create_index(["name"], unique=True) 12 | db["embeddings"].create( 13 | { 14 | "collection_id": int, 15 | "id": str, 16 | "embedding": bytes, 17 | "content": str, 18 | "metadata": str, 19 | }, 20 | pk=("collection_id", "id"), 21 | ) 22 | 23 | 24 | @embeddings_migrations() 25 | def m002_foreign_key(db): 26 | db["embeddings"].add_foreign_key("collection_id", "collections", "id") 27 | 28 | 29 | @embeddings_migrations() 30 | def m003_add_updated(db): 31 | db["embeddings"].add_column("updated", int) 32 | # Pretty-print the schema 33 | db["embeddings"].transform() 34 | # Assume anything existing was last updated right now 35 | db.query( 36 | "update embeddings set updated = ? where updated is null", [int(time.time())] 37 | ) 38 | 39 | 40 | @embeddings_migrations() 41 | def m004_store_content_hash(db): 42 | db["embeddings"].add_column("content_hash", bytes) 43 | db["embeddings"].transform( 44 | column_order=( 45 | "collection_id", 46 | "id", 47 | "embedding", 48 | "content", 49 | "content_hash", 50 | "metadata", 51 | "updated", 52 | ) 53 | ) 54 | 55 | # Register functions manually so we can de-register later 56 | def md5(text): 57 | return hashlib.md5(text.encode("utf8")).digest() 58 | 59 | def random_md5(): 60 | return hashlib.md5(str(time.time()).encode("utf8")).digest() 61 | 62 | db.conn.create_function("temp_md5", 1, md5) 63 | db.conn.create_function("temp_random_md5", 0, random_md5) 64 | 65 | with db.conn: 66 | db.execute( 67 | """ 68 | update embeddings 69 | set content_hash = temp_md5(content) 70 | where content is not null 71 | """ 72 | ) 73 | db.execute( 74 | """ 75 | update embeddings 76 | set content_hash = temp_random_md5() 77 | where content is null 78 | """ 79 | ) 80 | 81 | db["embeddings"].create_index(["content_hash"]) 82 | 83 | # De-register functions 84 | db.conn.create_function("temp_md5", 1, None) 85 | db.conn.create_function("temp_random_md5", 0, None) 86 | 87 | 88 | @embeddings_migrations() 89 | def m005_add_content_blob(db): 90 | db["embeddings"].add_column("content_blob", bytes) 91 | db["embeddings"].transform( 92 | column_order=("collection_id", "id", "embedding", "content", "content_blob") 93 | ) 94 | -------------------------------------------------------------------------------- /llm/errors.py: -------------------------------------------------------------------------------- 1 | class ModelError(Exception): 2 | "Models can raise this error, which will be displayed to the user" 3 | 4 | 5 | class NeedsKeyException(ModelError): 6 | "Model needs an API key which has not been provided" 7 | -------------------------------------------------------------------------------- /llm/hookspecs.py: -------------------------------------------------------------------------------- 1 | from pluggy import HookimplMarker 2 | from pluggy import HookspecMarker 3 | 4 | hookspec = HookspecMarker("llm") 5 | hookimpl = HookimplMarker("llm") 6 | 7 | 8 | @hookspec 9 | def register_commands(cli): 10 | """Register additional CLI commands, e.g. 'llm mycommand ...'""" 11 | 12 | 13 | @hookspec 14 | def register_models(register): 15 | "Register additional model instances representing LLM models that can be called" 16 | 17 | 18 | @hookspec 19 | def register_embedding_models(register): 20 | "Register additional model instances that can be used for embedding" 21 | 22 | 23 | @hookspec 24 | def register_template_loaders(register): 25 | "Register additional template loaders with prefixes" 26 | 27 | 28 | @hookspec 29 | def register_fragment_loaders(register): 30 | "Register additional fragment loaders with prefixes" 31 | 32 | 33 | @hookspec 34 | def register_tools(register): 35 | "Register functions that can be used as tools by the LLMs" 36 | -------------------------------------------------------------------------------- /llm/migrations.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from typing import Callable, List 3 | 4 | MIGRATIONS: List[Callable] = [] 5 | migration = MIGRATIONS.append 6 | 7 | 8 | def migrate(db): 9 | ensure_migrations_table(db) 10 | already_applied = {r["name"] for r in db["_llm_migrations"].rows} 11 | for fn in MIGRATIONS: 12 | name = fn.__name__ 13 | if name not in already_applied: 14 | fn(db) 15 | db["_llm_migrations"].insert( 16 | { 17 | "name": name, 18 | "applied_at": str(datetime.datetime.now(datetime.timezone.utc)), 19 | } 20 | ) 21 | already_applied.add(name) 22 | 23 | 24 | def ensure_migrations_table(db): 25 | if not db["_llm_migrations"].exists(): 26 | db["_llm_migrations"].create( 27 | { 28 | "name": str, 29 | "applied_at": str, 30 | }, 31 | pk="name", 32 | ) 33 | 34 | 35 | @migration 36 | def m001_initial(db): 37 | # Ensure the original table design exists, so other migrations can run 38 | if db["log"].exists(): 39 | # It needs to have the chat_id column 40 | if "chat_id" not in db["log"].columns_dict: 41 | db["log"].add_column("chat_id") 42 | return 43 | db["log"].create( 44 | { 45 | "provider": str, 46 | "system": str, 47 | "prompt": str, 48 | "chat_id": str, 49 | "response": str, 50 | "model": str, 51 | "timestamp": str, 52 | } 53 | ) 54 | 55 | 56 | @migration 57 | def m002_id_primary_key(db): 58 | db["log"].transform(pk="id") 59 | 60 | 61 | @migration 62 | def m003_chat_id_foreign_key(db): 63 | db["log"].transform(types={"chat_id": int}) 64 | db["log"].add_foreign_key("chat_id", "log", "id") 65 | 66 | 67 | @migration 68 | def m004_column_order(db): 69 | db["log"].transform( 70 | column_order=( 71 | "id", 72 | "model", 73 | "timestamp", 74 | "prompt", 75 | "system", 76 | "response", 77 | "chat_id", 78 | ) 79 | ) 80 | 81 | 82 | @migration 83 | def m004_drop_provider(db): 84 | db["log"].transform(drop=("provider",)) 85 | 86 | 87 | @migration 88 | def m005_debug(db): 89 | db["log"].add_column("debug", str) 90 | db["log"].add_column("duration_ms", int) 91 | 92 | 93 | @migration 94 | def m006_new_logs_table(db): 95 | columns = db["log"].columns_dict 96 | for column, type in ( 97 | ("options_json", str), 98 | ("prompt_json", str), 99 | ("response_json", str), 100 | ("reply_to_id", int), 101 | ): 102 | # It's possible people running development code like myself 103 | # might have accidentally created these columns already 104 | if column not in columns: 105 | db["log"].add_column(column, type) 106 | 107 | # Use .transform() to rename options and timestamp_utc, and set new order 108 | db["log"].transform( 109 | column_order=( 110 | "id", 111 | "model", 112 | "prompt", 113 | "system", 114 | "prompt_json", 115 | "options_json", 116 | "response", 117 | "response_json", 118 | "reply_to_id", 119 | "chat_id", 120 | "duration_ms", 121 | "timestamp_utc", 122 | ), 123 | rename={ 124 | "timestamp": "timestamp_utc", 125 | "options": "options_json", 126 | }, 127 | ) 128 | 129 | 130 | @migration 131 | def m007_finish_logs_table(db): 132 | db["log"].transform( 133 | drop={"debug"}, 134 | rename={"timestamp_utc": "datetime_utc"}, 135 | drop_foreign_keys=("chat_id",), 136 | ) 137 | with db.conn: 138 | db.execute("alter table log rename to logs") 139 | 140 | 141 | @migration 142 | def m008_reply_to_id_foreign_key(db): 143 | db["logs"].add_foreign_key("reply_to_id", "logs", "id") 144 | 145 | 146 | @migration 147 | def m008_fix_column_order_in_logs(db): 148 | # reply_to_id ended up at the end after foreign key added 149 | db["logs"].transform( 150 | column_order=( 151 | "id", 152 | "model", 153 | "prompt", 154 | "system", 155 | "prompt_json", 156 | "options_json", 157 | "response", 158 | "response_json", 159 | "reply_to_id", 160 | "chat_id", 161 | "duration_ms", 162 | "timestamp_utc", 163 | ), 164 | ) 165 | 166 | 167 | @migration 168 | def m009_delete_logs_table_if_empty(db): 169 | # We moved to a new table design, but we don't delete the table 170 | # if someone has put data in it 171 | if not db["logs"].count: 172 | db["logs"].drop() 173 | 174 | 175 | @migration 176 | def m010_create_new_log_tables(db): 177 | db["conversations"].create( 178 | { 179 | "id": str, 180 | "name": str, 181 | "model": str, 182 | }, 183 | pk="id", 184 | ) 185 | db["responses"].create( 186 | { 187 | "id": str, 188 | "model": str, 189 | "prompt": str, 190 | "system": str, 191 | "prompt_json": str, 192 | "options_json": str, 193 | "response": str, 194 | "response_json": str, 195 | "conversation_id": str, 196 | "duration_ms": int, 197 | "datetime_utc": str, 198 | }, 199 | pk="id", 200 | foreign_keys=(("conversation_id", "conversations", "id"),), 201 | ) 202 | 203 | 204 | @migration 205 | def m011_fts_for_responses(db): 206 | db["responses"].enable_fts(["prompt", "response"], create_triggers=True) 207 | 208 | 209 | @migration 210 | def m012_attachments_tables(db): 211 | db["attachments"].create( 212 | { 213 | "id": str, 214 | "type": str, 215 | "path": str, 216 | "url": str, 217 | "content": bytes, 218 | }, 219 | pk="id", 220 | ) 221 | db["prompt_attachments"].create( 222 | { 223 | "response_id": str, 224 | "attachment_id": str, 225 | "order": int, 226 | }, 227 | foreign_keys=( 228 | ("response_id", "responses", "id"), 229 | ("attachment_id", "attachments", "id"), 230 | ), 231 | pk=("response_id", "attachment_id"), 232 | ) 233 | 234 | 235 | @migration 236 | def m013_usage(db): 237 | db["responses"].add_column("input_tokens", int) 238 | db["responses"].add_column("output_tokens", int) 239 | db["responses"].add_column("token_details", str) 240 | 241 | 242 | @migration 243 | def m014_schemas(db): 244 | db["schemas"].create( 245 | { 246 | "id": str, 247 | "content": str, 248 | }, 249 | pk="id", 250 | ) 251 | db["responses"].add_column("schema_id", str, fk="schemas", fk_col="id") 252 | # Clean up SQL create table indentation 253 | db["responses"].transform() 254 | # These changes may have dropped the FTS configuration, fix that 255 | db["responses"].enable_fts( 256 | ["prompt", "response"], create_triggers=True, replace=True 257 | ) 258 | 259 | 260 | @migration 261 | def m015_fragments_tables(db): 262 | db["fragments"].create( 263 | { 264 | "id": int, 265 | "hash": str, 266 | "content": str, 267 | "datetime_utc": str, 268 | "source": str, 269 | }, 270 | pk="id", 271 | ) 272 | db["fragments"].create_index(["hash"], unique=True) 273 | db["fragment_aliases"].create( 274 | { 275 | "alias": str, 276 | "fragment_id": int, 277 | }, 278 | foreign_keys=(("fragment_id", "fragments", "id"),), 279 | pk="alias", 280 | ) 281 | db["prompt_fragments"].create( 282 | { 283 | "response_id": str, 284 | "fragment_id": int, 285 | "order": int, 286 | }, 287 | foreign_keys=( 288 | ("response_id", "responses", "id"), 289 | ("fragment_id", "fragments", "id"), 290 | ), 291 | pk=("response_id", "fragment_id"), 292 | ) 293 | db["system_fragments"].create( 294 | { 295 | "response_id": str, 296 | "fragment_id": int, 297 | "order": int, 298 | }, 299 | foreign_keys=( 300 | ("response_id", "responses", "id"), 301 | ("fragment_id", "fragments", "id"), 302 | ), 303 | pk=("response_id", "fragment_id"), 304 | ) 305 | 306 | 307 | @migration 308 | def m016_fragments_table_pks(db): 309 | # The same fragment can be attached to a response multiple times 310 | # https://github.com/simonw/llm/issues/863#issuecomment-2781720064 311 | db["prompt_fragments"].transform(pk=("response_id", "fragment_id", "order")) 312 | db["system_fragments"].transform(pk=("response_id", "fragment_id", "order")) 313 | 314 | 315 | @migration 316 | def m017_tools_tables(db): 317 | db["tools"].create( 318 | { 319 | "id": int, 320 | "hash": str, 321 | "name": str, 322 | "description": str, 323 | "input_schema": str, 324 | }, 325 | pk="id", 326 | ) 327 | db["tools"].create_index(["hash"], unique=True) 328 | # Many-to-many relationship between tools and responses 329 | db["tool_responses"].create( 330 | { 331 | "tool_id": int, 332 | "response_id": str, 333 | }, 334 | foreign_keys=( 335 | ("tool_id", "tools", "id"), 336 | ("response_id", "responses", "id"), 337 | ), 338 | pk=("tool_id", "response_id"), 339 | ) 340 | # tool_calls and tool_results are one-to-many against responses 341 | db["tool_calls"].create( 342 | { 343 | "id": int, 344 | "response_id": str, 345 | "tool_id": int, 346 | "name": str, 347 | "arguments": str, 348 | "tool_call_id": str, 349 | }, 350 | pk="id", 351 | foreign_keys=( 352 | ("response_id", "responses", "id"), 353 | ("tool_id", "tools", "id"), 354 | ), 355 | ) 356 | db["tool_results"].create( 357 | { 358 | "id": int, 359 | "response_id": str, 360 | "tool_id": int, 361 | "name": str, 362 | "output": str, 363 | "tool_call_id": str, 364 | }, 365 | pk="id", 366 | foreign_keys=( 367 | ("response_id", "responses", "id"), 368 | ("tool_id", "tools", "id"), 369 | ), 370 | ) 371 | 372 | 373 | @migration 374 | def m017_tools_plugin(db): 375 | db["tools"].add_column("plugin") 376 | 377 | 378 | @migration 379 | def m018_tool_instances(db): 380 | # Used to track instances of Toolbox classes that may be 381 | # used multiple times by different tools 382 | db["tool_instances"].create( 383 | { 384 | "id": int, 385 | "plugin": str, 386 | "name": str, 387 | "arguments": str, 388 | }, 389 | pk="id", 390 | ) 391 | # We record which instance was used only on the results 392 | db["tool_results"].add_column("instance_id", fk="tool_instances") 393 | 394 | 395 | @migration 396 | def m019_resolved_model(db): 397 | # For models like gemini-1.5-flash-latest where we wish to record 398 | # the resolved model name in addition to the alias 399 | db["responses"].add_column("resolved_model", str) 400 | 401 | 402 | @migration 403 | def m020_tool_results_attachments(db): 404 | db["tool_results_attachments"].create( 405 | { 406 | "tool_result_id": int, 407 | "attachment_id": str, 408 | "order": int, 409 | }, 410 | foreign_keys=( 411 | ("tool_result_id", "tool_results", "id"), 412 | ("attachment_id", "attachments", "id"), 413 | ), 414 | pk=("tool_result_id", "attachment_id"), 415 | ) 416 | 417 | 418 | @migration 419 | def m021_tool_results_exception(db): 420 | db["tool_results"].add_column("exception", str) 421 | -------------------------------------------------------------------------------- /llm/plugins.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from importlib import metadata 3 | import os 4 | import pluggy 5 | import sys 6 | from . import hookspecs 7 | 8 | DEFAULT_PLUGINS = ( 9 | "llm.default_plugins.openai_models", 10 | "llm.default_plugins.default_tools", 11 | ) 12 | 13 | pm = pluggy.PluginManager("llm") 14 | pm.add_hookspecs(hookspecs) 15 | 16 | LLM_LOAD_PLUGINS = os.environ.get("LLM_LOAD_PLUGINS", None) 17 | 18 | _loaded = False 19 | 20 | 21 | def load_plugins(): 22 | global _loaded 23 | if _loaded: 24 | return 25 | _loaded = True 26 | if not hasattr(sys, "_called_from_test") and LLM_LOAD_PLUGINS is None: 27 | # Only load plugins if not running tests 28 | pm.load_setuptools_entrypoints("llm") 29 | 30 | # Load any plugins specified in LLM_LOAD_PLUGINS") 31 | if LLM_LOAD_PLUGINS is not None: 32 | for package_name in [ 33 | name for name in LLM_LOAD_PLUGINS.split(",") if name.strip() 34 | ]: 35 | try: 36 | distribution = metadata.distribution(package_name) # Updated call 37 | llm_entry_points = [ 38 | ep for ep in distribution.entry_points if ep.group == "llm" 39 | ] 40 | for entry_point in llm_entry_points: 41 | mod = entry_point.load() 42 | pm.register(mod, name=entry_point.name) 43 | # Ensure name can be found in plugin_to_distinfo later: 44 | pm._plugin_distinfo.append((mod, distribution)) # type: ignore 45 | except metadata.PackageNotFoundError: 46 | sys.stderr.write(f"Plugin {package_name} could not be found\n") 47 | 48 | for plugin in DEFAULT_PLUGINS: 49 | mod = importlib.import_module(plugin) 50 | pm.register(mod, plugin) 51 | -------------------------------------------------------------------------------- /llm/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/llm/2292d7a56df25b7fd457b53bcc7d5cfccdf1821c/llm/py.typed -------------------------------------------------------------------------------- /llm/templates.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, ConfigDict 2 | import string 3 | from typing import Optional, Any, Dict, List, Tuple 4 | 5 | 6 | class AttachmentType(BaseModel): 7 | type: str 8 | value: str 9 | 10 | 11 | class Template(BaseModel): 12 | name: str 13 | prompt: Optional[str] = None 14 | system: Optional[str] = None 15 | attachments: Optional[List[str]] = None 16 | attachment_types: Optional[List[AttachmentType]] = None 17 | model: Optional[str] = None 18 | defaults: Optional[Dict[str, Any]] = None 19 | options: Optional[Dict[str, Any]] = None 20 | extract: Optional[bool] = None # For extracting fenced code blocks 21 | extract_last: Optional[bool] = None 22 | schema_object: Optional[dict] = None 23 | fragments: Optional[List[str]] = None 24 | system_fragments: Optional[List[str]] = None 25 | tools: Optional[List[str]] = None 26 | functions: Optional[str] = None 27 | 28 | model_config = ConfigDict(extra="forbid") 29 | 30 | class MissingVariables(Exception): 31 | pass 32 | 33 | def __init__(self, **data): 34 | super().__init__(**data) 35 | # Not a pydantic field to avoid YAML being able to set it 36 | # this controls if Python inline functions code is trusted 37 | self._functions_is_trusted = False 38 | 39 | def evaluate( 40 | self, input: str, params: Optional[Dict[str, Any]] = None 41 | ) -> Tuple[Optional[str], Optional[str]]: 42 | params = params or {} 43 | params["input"] = input 44 | if self.defaults: 45 | for k, v in self.defaults.items(): 46 | if k not in params: 47 | params[k] = v 48 | prompt: Optional[str] = None 49 | system: Optional[str] = None 50 | if not self.prompt: 51 | system = self.interpolate(self.system, params) 52 | prompt = input 53 | else: 54 | prompt = self.interpolate(self.prompt, params) 55 | system = self.interpolate(self.system, params) 56 | return prompt, system 57 | 58 | def vars(self) -> set: 59 | all_vars = set() 60 | for text in [self.prompt, self.system]: 61 | if not text: 62 | continue 63 | all_vars.update(self.extract_vars(string.Template(text))) 64 | return all_vars 65 | 66 | @classmethod 67 | def interpolate(cls, text: Optional[str], params: Dict[str, Any]) -> Optional[str]: 68 | if not text: 69 | return text 70 | # Confirm all variables in text are provided 71 | string_template = string.Template(text) 72 | vars = cls.extract_vars(string_template) 73 | missing = [p for p in vars if p not in params] 74 | if missing: 75 | raise cls.MissingVariables( 76 | "Missing variables: {}".format(", ".join(missing)) 77 | ) 78 | return string_template.substitute(**params) 79 | 80 | @staticmethod 81 | def extract_vars(string_template: string.Template) -> List[str]: 82 | return [ 83 | match.group("named") 84 | for match in string_template.pattern.finditer(string_template.template) 85 | if match.group("named") 86 | ] 87 | -------------------------------------------------------------------------------- /llm/tools.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from importlib.metadata import version 3 | import time 4 | 5 | 6 | def llm_version() -> str: 7 | "Return the installed version of llm" 8 | return version("llm") 9 | 10 | 11 | def llm_time() -> dict: 12 | "Returns the current time, as local time and UTC" 13 | # Get current times 14 | utc_time = datetime.now(timezone.utc) 15 | local_time = datetime.now() 16 | 17 | # Get timezone information 18 | local_tz_name = time.tzname[time.localtime().tm_isdst] 19 | is_dst = bool(time.localtime().tm_isdst) 20 | 21 | # Calculate offset 22 | offset_seconds = -time.timezone if not is_dst else -time.altzone 23 | offset_hours = offset_seconds // 3600 24 | offset_minutes = (offset_seconds % 3600) // 60 25 | 26 | timezone_offset = ( 27 | f"UTC{'+' if offset_hours >= 0 else ''}{offset_hours:02d}:{offset_minutes:02d}" 28 | ) 29 | 30 | return { 31 | "utc_time": utc_time.strftime("%Y-%m-%d %H:%M:%S UTC"), 32 | "utc_time_iso": utc_time.isoformat(), 33 | "local_timezone": local_tz_name, 34 | "local_time": local_time.strftime("%Y-%m-%d %H:%M:%S"), 35 | "timezone_offset": timezone_offset, 36 | "is_dst": is_dst, 37 | } 38 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-pluggy.*] 4 | ignore_missing_imports = True 5 | 6 | [mypy-click_default_group.*] 7 | ignore_missing_imports = True 8 | 9 | [mypy-sqlite_migrate.*] 10 | ignore_missing_imports = True 11 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm" 3 | version = "0.26" 4 | description = "CLI utility and Python library for interacting with Large Language Models from organizations like OpenAI, Anthropic and Gemini plus local models installed on your own machine." 5 | readme = { file = "README.md", content-type = "text/markdown" } 6 | authors = [ 7 | { name = "Simon Willison" }, 8 | ] 9 | license = "Apache-2.0" 10 | requires-python = ">=3.9" 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Developers", 14 | "Intended Audience :: End Users/Desktop", 15 | "Intended Audience :: Science/Research", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.9", 18 | "Programming Language :: Python :: 3.10", 19 | "Programming Language :: Python :: 3.11", 20 | "Programming Language :: Python :: 3.12", 21 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 22 | "Topic :: Text Processing :: Linguistic", 23 | "Topic :: Utilities", 24 | ] 25 | 26 | dependencies = [ 27 | "click", 28 | "condense-json>=0.1.3", 29 | "openai>=1.55.3", 30 | "click-default-group>=1.2.3", 31 | "sqlite-utils>=3.37", 32 | "sqlite-migrate>=0.1a2", 33 | "pydantic>=2.0.0", 34 | "PyYAML", 35 | "pluggy", 36 | "python-ulid", 37 | "setuptools", 38 | "pip", 39 | "pyreadline3; sys_platform == 'win32'", 40 | "puremagic", 41 | ] 42 | 43 | [project.urls] 44 | Homepage = "https://github.com/simonw/llm" 45 | Documentation = "https://llm.datasette.io/" 46 | Issues = "https://github.com/simonw/llm/issues" 47 | CI = "https://github.com/simonw/llm/actions" 48 | Changelog = "https://github.com/simonw/llm/releases" 49 | 50 | [project.scripts] 51 | llm = "llm.cli:cli" 52 | 53 | [project.optional-dependencies] 54 | test = [ 55 | "build", 56 | "click<8.2.0", # https://github.com/simonw/llm/issues/1024 57 | "pytest", 58 | "numpy", 59 | "pytest-httpx>=0.33.0", 60 | "pytest-asyncio", 61 | "cogapp", 62 | "mypy>=1.10.0", 63 | "black>=25.1.0", 64 | "pytest-recording", 65 | "ruff", 66 | "syrupy", 67 | "types-click", 68 | "types-PyYAML", 69 | "types-setuptools", 70 | "llm-echo==0.3a3", 71 | ] 72 | 73 | [build-system] 74 | requires = ["setuptools"] 75 | build-backend = "setuptools.build_meta" 76 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | asyncio_default_fixture_loop_scope = function -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | line-length = 160 2 | -------------------------------------------------------------------------------- /tests/cassettes/test_tools/test_tool_use_chain_of_two_calls.yaml: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have 4 | dragons? Answer with only YES or NO"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns 5 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns 6 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}' 7 | headers: 8 | accept: 9 | - application/json 10 | accept-encoding: 11 | - gzip, deflate 12 | connection: 13 | - keep-alive 14 | content-length: 15 | - '650' 16 | content-type: 17 | - application/json 18 | host: 19 | - api.openai.com 20 | user-agent: 21 | - OpenAI/Python 1.78.0 22 | x-stainless-arch: 23 | - arm64 24 | x-stainless-async: 25 | - 'false' 26 | x-stainless-lang: 27 | - python 28 | x-stainless-os: 29 | - MacOS 30 | x-stainless-package-version: 31 | - 1.78.0 32 | x-stainless-read-timeout: 33 | - '600' 34 | x-stainless-retry-count: 35 | - '0' 36 | x-stainless-runtime: 37 | - CPython 38 | x-stainless-runtime-version: 39 | - 3.13.3 40 | method: POST 41 | uri: https://api.openai.com/v1/chat/completions 42 | response: 43 | body: 44 | string: !!binary | 45 | H4sIAAAAAAAAAwAAAP//jFPBjtowEL3nK6w5kyrJ0gI5slXppWzbZbdqyyoyziS4OLZrO1sQ4t+r 46 | GEjCLpWaQ2TNm/fmzYy9DwgBnkNKgK2pY5UW4fSbns1n+ee7+eLP9uPmx+N29Zu5L/fT99VCwqBh 47 | qNUvZO7MesNUpQU6rk4wM0gdNqrxaDiK390kb2MPVCpH0dBK7cKhCisueZhEyTCMRmE8PrHXijO0 48 | kJKfASGE7P2/8Slz3EJKosE5UqG1tERI2yRCwCjRRIBay62j0sGgA5mSDmVjXdZC9ACnlMgYFaIr 49 | fPz2vXM3LCpEtlh8Hz98mKuRmd/Su+nD3Imv98+fZr16R+md9oaKWrJ2SD28jacvihECklaeK5Ta 50 | 1DrTSteCXhEhBKgp6wqlaxqA/RKYqqUzuyWkS7g1daXRLeEAF7RDcO381JuLwaK2VLweGJVSOW/F 51 | T+zphBza5QhVaqNW9gUVCi65XWcGqfU990cfnI14C1BfbBe0UZV2mVMb9EUnyVEUugvYgfHoBDrl 52 | qOjFo8ngilyWo6Pcb7+9cIyyNeYdtbt4tM656gFBr/XXbq5pH9vnsvwf+Q5gDLXDPNMGc84uO+7S 53 | DDbv819p7ZC9YbBonjnDzHE0zTpyLGgtjq8G7M46rLKCyxKNNtw/HSh0Ft1MknGSRJMIgkPwFwAA 54 | //8DALof6VxIBAAA 55 | headers: 56 | CF-RAY: 57 | - 93f47072dde6f88d-IAD 58 | Connection: 59 | - keep-alive 60 | Content-Encoding: 61 | - gzip 62 | Content-Type: 63 | - application/json 64 | Date: 65 | - Tue, 13 May 2025 19:07:32 GMT 66 | Server: 67 | - cloudflare 68 | Set-Cookie: 69 | - __cf_bm=vfHkbLfwVTTGPkFT0I4U0xn5CHQZYIpOutDV4z7NRlA-1747163252-1.0.1.1-kj_JiiyNxn9AWCWisV6.pYNShKVqqT0Foicji2.ZLNaAkHm5VEwac0QjxVhCiWQs9Xp_wvkeTzrgVxmD8bkzDwTPn96U.81YERXZda3_m18; 70 | path=/; expires=Tue, 13-May-25 19:37:32 GMT; domain=.api.openai.com; HttpOnly; 71 | Secure; SameSite=None 72 | - _cfuvid=SQgXKMy2qkeOsbwwTl62blvuirTS_TkZSvEOztbYIlI-1747163252293-0.0.1.1-604800000; 73 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None 74 | Transfer-Encoding: 75 | - chunked 76 | X-Content-Type-Options: 77 | - nosniff 78 | access-control-expose-headers: 79 | - X-Request-ID 80 | alt-svc: 81 | - h3=":443"; ma=86400 82 | cf-cache-status: 83 | - DYNAMIC 84 | openai-organization: 85 | - user-r3e61fpak04cbaokp5buoae4 86 | openai-processing-ms: 87 | - '574' 88 | openai-version: 89 | - '2020-10-01' 90 | strict-transport-security: 91 | - max-age=31536000; includeSubDomains; preload 92 | x-envoy-upstream-service-time: 93 | - '591' 94 | x-ratelimit-limit-requests: 95 | - '30000' 96 | x-ratelimit-limit-tokens: 97 | - '150000000' 98 | x-ratelimit-remaining-requests: 99 | - '29999' 100 | x-ratelimit-remaining-tokens: 101 | - '149999981' 102 | x-ratelimit-reset-requests: 103 | - 2ms 104 | x-ratelimit-reset-tokens: 105 | - 0s 106 | x-request-id: 107 | - req_1e7dabaf1f0dba1ec89a134d3bde8476 108 | status: 109 | code: 200 110 | message: OK 111 | - request: 112 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have 113 | dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\": 114 | \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns 115 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns 116 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}' 117 | headers: 118 | accept: 119 | - application/json 120 | accept-encoding: 121 | - gzip, deflate 122 | connection: 123 | - keep-alive 124 | content-length: 125 | - '906' 126 | content-type: 127 | - application/json 128 | host: 129 | - api.openai.com 130 | user-agent: 131 | - OpenAI/Python 1.78.0 132 | x-stainless-arch: 133 | - arm64 134 | x-stainless-async: 135 | - 'false' 136 | x-stainless-lang: 137 | - python 138 | x-stainless-os: 139 | - MacOS 140 | x-stainless-package-version: 141 | - 1.78.0 142 | x-stainless-read-timeout: 143 | - '600' 144 | x-stainless-retry-count: 145 | - '0' 146 | x-stainless-runtime: 147 | - CPython 148 | x-stainless-runtime-version: 149 | - 3.13.3 150 | method: POST 151 | uri: https://api.openai.com/v1/chat/completions 152 | response: 153 | body: 154 | string: !!binary | 155 | H4sIAAAAAAAAA4xTTYvbMBC9+1eIOcfFH2k+fNyWlEIPLaWkm+5itNLY0UaWVEkOzYb892J7YzvZ 156 | FOqDEfPmvXkzIx0DQkBwyAiwLfWsMjK8W5tP39a7es+i1YZvNs9fXxb4Jf7A/R1bwaRh6KdnZP7M 157 | esd0ZSR6oVUHM4vUY6Maz6fzeJYm76ctUGmOsqGVxodTHVZCiTCJkmkYzcN48creasHQQUZ+BYQQ 158 | cmz/jU/F8Q9kJJqcIxU6R0uErE8iBKyWTQSoc8J5qjxMBpBp5VE11lUt5QjwWsucUSmHwt13HJ2H 159 | YVEpc/p7+eMgvq92Lz9n68U9Z2n6UX9e3o/qddIH0xoqasX6IY3wPp5dFSMEFK2wK6jyLd1jzi0t 160 | tXJXGoQAtWVdofKNfzg+gNGmlrTRfYAsTtI4mZ7ggnQKbp0fR0OxWNSOyrfTokpp34q343p8RU79 161 | ZqQujdVP7ooKhVDCbXOL1LUNj+cenI20FqC+WC0Yqyvjc6932BaN40WnCsP1G6Fn0GtP5SieziY3 162 | 9HKOnop29/11Y5RtkQ/U4drRmgs9AoJR72/d3NLu+heq/B/5AWAMjUeeG4tcsMuOhzSLzev8V1o/ 163 | 5dYwOLR7wTD3Am2zD44FrWX3ZsAdnMcqL4Qq0Ror2ocDhcmjdJkskiRaRhCcgr8AAAD//wMAmw02 164 | QkYEAAA= 165 | headers: 166 | CF-RAY: 167 | - 93f47082ba71d640-IAD 168 | Connection: 169 | - keep-alive 170 | Content-Encoding: 171 | - gzip 172 | Content-Type: 173 | - application/json 174 | Date: 175 | - Tue, 13 May 2025 19:07:35 GMT 176 | Server: 177 | - cloudflare 178 | Set-Cookie: 179 | - __cf_bm=LL6YtOWVW4fA687_GIMcuJC7CM2I.uKx1vGaNkjFTgo-1747163255-1.0.1.1-qML6IsLM49e2bg7zp0uGqn3.JTJP5KlFYfb8o3v9LzyLb.cYoFBXn5te83Wxl5kVjDiXU2vH.QTFQu953KNx87LwsMkI2ZxTvH58oZWAawg; 180 | path=/; expires=Tue, 13-May-25 19:37:35 GMT; domain=.api.openai.com; HttpOnly; 181 | Secure; SameSite=None 182 | - _cfuvid=QOa3sx0F4_nAYKtjmx9ux7qfIsyipGZq94AL_SWd2ac-1747163255176-0.0.1.1-604800000; 183 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None 184 | Transfer-Encoding: 185 | - chunked 186 | X-Content-Type-Options: 187 | - nosniff 188 | access-control-expose-headers: 189 | - X-Request-ID 190 | alt-svc: 191 | - h3=":443"; ma=86400 192 | cf-cache-status: 193 | - DYNAMIC 194 | openai-organization: 195 | - user-r3e61fpak04cbaokp5buoae4 196 | openai-processing-ms: 197 | - '575' 198 | openai-version: 199 | - '2020-10-01' 200 | strict-transport-security: 201 | - max-age=31536000; includeSubDomains; preload 202 | x-envoy-upstream-service-time: 203 | - '587' 204 | x-ratelimit-limit-requests: 205 | - '30000' 206 | x-ratelimit-limit-tokens: 207 | - '150000000' 208 | x-ratelimit-remaining-requests: 209 | - '29999' 210 | x-ratelimit-remaining-tokens: 211 | - '149999976' 212 | x-ratelimit-reset-requests: 213 | - 2ms 214 | x-ratelimit-reset-tokens: 215 | - 0s 216 | x-request-id: 217 | - req_66cc3b2bbe3be82a37d29fba7672d82b 218 | status: 219 | code: 200 220 | message: OK 221 | - request: 222 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have 223 | dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\": 224 | \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","function":{"name":"can_have_dragons","arguments":"{\"population\": 225 | 123124}"}}]},{"role":"tool","tool_call_id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","content":"true"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns 226 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns 227 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}' 228 | headers: 229 | accept: 230 | - application/json 231 | accept-encoding: 232 | - gzip, deflate 233 | connection: 234 | - keep-alive 235 | content-length: 236 | - '1157' 237 | content-type: 238 | - application/json 239 | host: 240 | - api.openai.com 241 | user-agent: 242 | - OpenAI/Python 1.78.0 243 | x-stainless-arch: 244 | - arm64 245 | x-stainless-async: 246 | - 'false' 247 | x-stainless-lang: 248 | - python 249 | x-stainless-os: 250 | - MacOS 251 | x-stainless-package-version: 252 | - 1.78.0 253 | x-stainless-read-timeout: 254 | - '600' 255 | x-stainless-retry-count: 256 | - '0' 257 | x-stainless-runtime: 258 | - CPython 259 | x-stainless-runtime-version: 260 | - 3.13.3 261 | method: POST 262 | uri: https://api.openai.com/v1/chat/completions 263 | response: 264 | body: 265 | string: !!binary | 266 | H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUCz/HgOGmd5NYW2447bNjQDIWhSLSjThYFiS42FPnv 267 | g+w0drcO2EUHfXzUexSfMyHAaNgJUEfJqvM2v/3mP37Z31ebq69xb/zdp+Jw8/Sh2lf8qG9gkRR0 268 | eETFL6p3ijpvkQ25EauAkjF1XVbranm9Kq+qAXSk0SZZ6zlfU94ZZ/KyKNd5UeXLzVl9JKMwwk58 269 | z4QQ4nk4k0+n8SfsRLF4uekwRtki7C5FQkAgm25AxmgiS8ewmKAix+gG6/fvP89JwKaPMrlzvbUz 270 | IJ0jlind4OnhTE4XF5ZaH+gQ/5BCY5yJxzqgjOTSi5HJw0BPmRAPQ9r+VQDwgTrPNdMPHJ5brq/H 271 | fjANeaKrM2Niaeei7eKNdrVGlsbG2bhASXVEPUmn2cpeG5qBbBb6bzNv9R6DG9f+T/sJKIWeUdc+ 272 | oDbqdeCpLGBawX+VXYY8GIaI4ckorNlgSB+hsZG9HRcD4q/I2NWNcS0GH8y4HY2vi9W23JRlsS0g 273 | O2W/AQAA//8DAFbEZUIrAwAA 274 | headers: 275 | CF-RAY: 276 | - 93f47096cf15d6e9-IAD 277 | Connection: 278 | - keep-alive 279 | Content-Encoding: 280 | - gzip 281 | Content-Type: 282 | - application/json 283 | Date: 284 | - Tue, 13 May 2025 19:07:37 GMT 285 | Server: 286 | - cloudflare 287 | Set-Cookie: 288 | - __cf_bm=EDR.bZeRmrWVNTWef5aAJ2C5NT7yIBHq_6NzNGXNlX0-1747163257-1.0.1.1-YuS4Hj.Ncp4eOrYNT5L7AncdqT5Xn8a2DTxCka1HKKBGKdT8k70yvNTA3wMlQyVPxGD3HSCysY0a1n1zCkNs._TQe9hWOuoIDG9LtD9MBr4; 289 | path=/; expires=Tue, 13-May-25 19:37:37 GMT; domain=.api.openai.com; HttpOnly; 290 | Secure; SameSite=None 291 | - _cfuvid=3Xqq8l5nvU4mfyEz4.llgkHC3jY.IBLFTJrD76P7UsY-1747163257692-0.0.1.1-604800000; 292 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None 293 | Transfer-Encoding: 294 | - chunked 295 | X-Content-Type-Options: 296 | - nosniff 297 | access-control-expose-headers: 298 | - X-Request-ID 299 | alt-svc: 300 | - h3=":443"; ma=86400 301 | cf-cache-status: 302 | - DYNAMIC 303 | openai-organization: 304 | - user-r3e61fpak04cbaokp5buoae4 305 | openai-processing-ms: 306 | - '222' 307 | openai-version: 308 | - '2020-10-01' 309 | strict-transport-security: 310 | - max-age=31536000; includeSubDomains; preload 311 | x-envoy-upstream-service-time: 312 | - '227' 313 | x-ratelimit-limit-requests: 314 | - '30000' 315 | x-ratelimit-limit-tokens: 316 | - '150000000' 317 | x-ratelimit-remaining-requests: 318 | - '29999' 319 | x-ratelimit-remaining-tokens: 320 | - '149999974' 321 | x-ratelimit-reset-requests: 322 | - 2ms 323 | x-ratelimit-reset-tokens: 324 | - 0s 325 | x-request-id: 326 | - req_d157a5a0f4b64776bc387ccab624e664 327 | status: 328 | code: 200 329 | message: OK 330 | version: 1 331 | -------------------------------------------------------------------------------- /tests/test-llm-load-plugins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This should only run in environments where both 3 | # llm-cluster and llm-mistral are installed 4 | 5 | PLUGINS=$(llm plugins) 6 | echo "$PLUGINS" | jq 'any(.[]; .name == "llm-mistral")' | \ 7 | grep -q true || ( \ 8 | echo "Test failed: llm-mistral not found" && \ 9 | exit 1 \ 10 | ) 11 | # With the LLM_LOAD_PLUGINS we should not see that 12 | PLUGINS2=$(LLM_LOAD_PLUGINS=llm-cluster llm plugins) 13 | echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-mistral")' | \ 14 | grep -q false || ( \ 15 | echo "Test failed: llm-mistral should not have been loaded" && \ 16 | exit 1 \ 17 | ) 18 | echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-cluster")' | \ 19 | grep -q true || ( \ 20 | echo "Test llm-cluster should have been loaded" && \ 21 | exit 1 \ 22 | ) 23 | # With LLM_LOAD_PLUGINS='' we should see no plugins 24 | PLUGINS3=$(LLM_LOAD_PLUGINS='' llm plugins) 25 | echo "$PLUGINS3"| \ 26 | grep -q '\[\]' || ( \ 27 | echo "Test failed: plugins should have returned []" && \ 28 | exit 1 \ 29 | ) 30 | -------------------------------------------------------------------------------- /tests/test_aliases.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from llm.cli import cli 3 | import llm 4 | import json 5 | import pytest 6 | import re 7 | 8 | 9 | @pytest.mark.parametrize("model_id_or_alias", ("gpt-3.5-turbo", "chatgpt")) 10 | def test_set_alias(model_id_or_alias): 11 | with pytest.raises(llm.UnknownModelError): 12 | llm.get_model("this-is-a-new-alias") 13 | llm.set_alias("this-is-a-new-alias", model_id_or_alias) 14 | assert llm.get_model("this-is-a-new-alias").model_id == "gpt-3.5-turbo" 15 | 16 | 17 | def test_remove_alias(): 18 | with pytest.raises(KeyError): 19 | llm.remove_alias("some-other-alias") 20 | llm.set_alias("some-other-alias", "gpt-3.5-turbo") 21 | assert llm.get_model("some-other-alias").model_id == "gpt-3.5-turbo" 22 | llm.remove_alias("some-other-alias") 23 | with pytest.raises(llm.UnknownModelError): 24 | llm.get_model("some-other-alias") 25 | 26 | 27 | @pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"])) 28 | def test_cli_aliases_list(args): 29 | llm.set_alias("e-demo", "embed-demo") 30 | runner = CliRunner() 31 | result = runner.invoke(cli, args) 32 | assert result.exit_code == 0 33 | for line in ( 34 | "3.5 : gpt-3.5-turbo\n" 35 | "chatgpt : gpt-3.5-turbo\n" 36 | "chatgpt-16k : gpt-3.5-turbo-16k\n" 37 | "3.5-16k : gpt-3.5-turbo-16k\n" 38 | "4 : gpt-4\n" 39 | "gpt4 : gpt-4\n" 40 | "4-32k : gpt-4-32k\n" 41 | "e-demo : embed-demo (embedding)\n" 42 | "ada : text-embedding-ada-002 (embedding)\n" 43 | ).split("\n"): 44 | line = line.strip() 45 | if not line: 46 | continue 47 | # Turn the whitespace into a regex 48 | regex = r"\s+".join(re.escape(part) for part in line.split()) 49 | assert re.search(regex, result.output) 50 | 51 | 52 | @pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"])) 53 | def test_cli_aliases_list_json(args): 54 | llm.set_alias("e-demo", "embed-demo") 55 | runner = CliRunner() 56 | result = runner.invoke(cli, args + ["--json"]) 57 | assert result.exit_code == 0 58 | assert ( 59 | json.loads(result.output).items() 60 | >= { 61 | "3.5": "gpt-3.5-turbo", 62 | "chatgpt": "gpt-3.5-turbo", 63 | "chatgpt-16k": "gpt-3.5-turbo-16k", 64 | "3.5-16k": "gpt-3.5-turbo-16k", 65 | "4": "gpt-4", 66 | "gpt4": "gpt-4", 67 | "4-32k": "gpt-4-32k", 68 | "ada": "text-embedding-ada-002", 69 | "e-demo": "embed-demo", 70 | }.items() 71 | ) 72 | 73 | 74 | @pytest.mark.parametrize( 75 | "args,expected,expected_error", 76 | ( 77 | (["foo", "bar"], {"foo": "bar"}, None), 78 | (["foo", "-q", "mo"], {"foo": "mock"}, None), 79 | (["foo", "-q", "mog"], None, "No model found matching query: mog"), 80 | ), 81 | ) 82 | def test_cli_aliases_set(user_path, args, expected, expected_error): 83 | # Should be not aliases.json at start 84 | assert not (user_path / "aliases.json").exists() 85 | runner = CliRunner() 86 | result = runner.invoke(cli, ["aliases", "set"] + args) 87 | if not expected_error: 88 | assert result.exit_code == 0 89 | assert (user_path / "aliases.json").exists() 90 | assert json.loads((user_path / "aliases.json").read_text("utf-8")) == expected 91 | else: 92 | assert result.exit_code == 1 93 | assert result.output.strip() == f"Error: {expected_error}" 94 | 95 | 96 | def test_cli_aliases_path(user_path): 97 | runner = CliRunner() 98 | result = runner.invoke(cli, ["aliases", "path"]) 99 | assert result.exit_code == 0 100 | assert result.output.strip() == str(user_path / "aliases.json") 101 | 102 | 103 | def test_cli_aliases_remove(user_path): 104 | (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8") 105 | runner = CliRunner() 106 | result = runner.invoke(cli, ["aliases", "remove", "foo"]) 107 | assert result.exit_code == 0 108 | assert json.loads((user_path / "aliases.json").read_text("utf-8")) == {} 109 | 110 | 111 | def test_cli_aliases_remove_invalid(user_path): 112 | (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8") 113 | runner = CliRunner() 114 | result = runner.invoke(cli, ["aliases", "remove", "invalid"]) 115 | assert result.exit_code == 1 116 | assert result.output == "Error: No such alias: invalid\n" 117 | 118 | 119 | @pytest.mark.parametrize("args", (["models"], ["models", "list"])) 120 | def test_cli_aliases_are_registered(user_path, args): 121 | (user_path / "aliases.json").write_text( 122 | json.dumps({"foo": "bar", "turbo": "gpt-3.5-turbo"}), "utf-8" 123 | ) 124 | runner = CliRunner() 125 | result = runner.invoke(cli, args) 126 | assert result.exit_code == 0 127 | # Check for model line only, without keys, as --options is not used 128 | assert "gpt-3.5-turbo (aliases: 3.5, chatgpt, turbo)" in result.output 129 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | import llm 2 | import pytest 3 | 4 | 5 | @pytest.mark.asyncio 6 | async def test_async_model(async_mock_model): 7 | gathered = [] 8 | async_mock_model.enqueue(["hello world"]) 9 | async for chunk in async_mock_model.prompt("hello"): 10 | gathered.append(chunk) 11 | assert gathered == ["hello world"] 12 | # Not as an iterator 13 | async_mock_model.enqueue(["hello world"]) 14 | response = await async_mock_model.prompt("hello") 15 | text = await response.text() 16 | assert text == "hello world" 17 | assert isinstance(response, llm.AsyncResponse) 18 | usage = await response.usage() 19 | assert usage.input == 1 20 | assert usage.output == 1 21 | assert usage.details is None 22 | 23 | 24 | @pytest.mark.asyncio 25 | async def test_async_model_conversation(async_mock_model): 26 | async_mock_model.enqueue(["joke 1"]) 27 | conversation = async_mock_model.conversation() 28 | response = await conversation.prompt("joke") 29 | text = await response.text() 30 | assert text == "joke 1" 31 | async_mock_model.enqueue(["joke 2"]) 32 | response2 = await conversation.prompt("again") 33 | text2 = await response2.text() 34 | assert text2 == "joke 2" 35 | 36 | 37 | @pytest.mark.asyncio 38 | async def test_async_on_done(async_mock_model): 39 | async_mock_model.enqueue(["hello world"]) 40 | response = await async_mock_model.prompt(prompt="hello") 41 | caught = [] 42 | 43 | def done(response): 44 | caught.append(response) 45 | 46 | assert len(caught) == 0 47 | await response.on_done(done) 48 | await response.text() 49 | assert response._done 50 | assert len(caught) == 1 51 | 52 | 53 | @pytest.mark.asyncio 54 | async def test_async_conversation(async_mock_model): 55 | async_mock_model.enqueue(["one"]) 56 | conversation = async_mock_model.conversation() 57 | response1 = await conversation.prompt("hi").text() 58 | async_mock_model.enqueue(["two"]) 59 | response2 = await conversation.prompt("hi").text() 60 | assert response1 == "one" 61 | assert response2 == "two" 62 | -------------------------------------------------------------------------------- /tests/test_attachments.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from unittest.mock import ANY 3 | import llm 4 | from llm import cli 5 | import pytest 6 | 7 | TINY_PNG = ( 8 | b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xa6\x00\x00\x01\x1a" 9 | b"\x02\x03\x00\x00\x00\xe6\x99\xc4^\x00\x00\x00\tPLTE\xff\xff\xff" 10 | b"\x00\xff\x00\xfe\x01\x00\x12t\x01J\x00\x00\x00GIDATx\xda\xed\xd81\x11" 11 | b"\x000\x08\xc0\xc0.]\xea\xaf&Q\x89\x04V\xe0>\xf3+\xc8\x91Z\xf4\xa2\x08EQ\x14E" 12 | b"Q\x14EQ\x14EQ\xd4B\x91$I3\xbb\xbf\x08EQ\x14EQ\x14EQ\x14E\xd1\xa5" 13 | b"\xd4\x17\x91\xc6\x95\x05\x15\x0f\x9f\xc5\t\x9f\xa4\x00\x00\x00\x00IEND\xaeB`" 14 | b"\x82" 15 | ) 16 | 17 | TINY_WAV = b"RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00D\xac\x00\x00" 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "attachment_type,attachment_content", 22 | [ 23 | ("image/png", TINY_PNG), 24 | ("audio/wav", TINY_WAV), 25 | ], 26 | ) 27 | def test_prompt_attachment(mock_model, logs_db, attachment_type, attachment_content): 28 | runner = CliRunner() 29 | mock_model.enqueue(["two boxes"]) 30 | result = runner.invoke( 31 | cli.cli, 32 | ["prompt", "-m", "mock", "describe file", "-a", "-"], 33 | input=attachment_content, 34 | catch_exceptions=False, 35 | ) 36 | assert result.exit_code == 0, result.output 37 | assert result.output == "two boxes\n" 38 | assert mock_model.history[0][0].attachments[0] == llm.Attachment( 39 | type=attachment_type, path=None, url=None, content=attachment_content, _id=ANY 40 | ) 41 | 42 | # Check it was logged correctly 43 | conversations = list(logs_db["conversations"].rows) 44 | assert len(conversations) == 1 45 | conversation = conversations[0] 46 | assert conversation["model"] == "mock" 47 | assert conversation["name"] == "describe file" 48 | response = list(logs_db["responses"].rows)[0] 49 | attachment = list(logs_db["attachments"].rows)[0] 50 | assert attachment == { 51 | "id": ANY, 52 | "type": attachment_type, 53 | "path": None, 54 | "url": None, 55 | "content": attachment_content, 56 | } 57 | prompt_attachment = list(logs_db["prompt_attachments"].rows)[0] 58 | assert prompt_attachment["attachment_id"] == attachment["id"] 59 | assert prompt_attachment["response_id"] == response["id"] 60 | -------------------------------------------------------------------------------- /tests/test_cli_openai_models.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from llm.cli import cli 3 | import pytest 4 | import sqlite_utils 5 | 6 | 7 | @pytest.fixture 8 | def mocked_models(httpx_mock): 9 | httpx_mock.add_response( 10 | method="GET", 11 | url="https://api.openai.com/v1/models", 12 | json={ 13 | "data": [ 14 | { 15 | "id": "ada:2020-05-03", 16 | "object": "model", 17 | "created": 1588537600, 18 | "owned_by": "openai", 19 | }, 20 | { 21 | "id": "babbage:2020-05-03", 22 | "object": "model", 23 | "created": 1588537600, 24 | "owned_by": "openai", 25 | }, 26 | ] 27 | }, 28 | headers={"Content-Type": "application/json"}, 29 | ) 30 | return httpx_mock 31 | 32 | 33 | def test_openai_models(mocked_models): 34 | runner = CliRunner() 35 | result = runner.invoke(cli, ["openai", "models", "--key", "x"]) 36 | assert result.exit_code == 0 37 | assert result.output == ( 38 | "id owned_by created \n" 39 | "ada:2020-05-03 openai 2020-05-03T20:26:40+00:00\n" 40 | "babbage:2020-05-03 openai 2020-05-03T20:26:40+00:00\n" 41 | ) 42 | 43 | 44 | def test_openai_options_min_max(): 45 | options = { 46 | "temperature": [0, 2], 47 | "top_p": [0, 1], 48 | "frequency_penalty": [-2, 2], 49 | "presence_penalty": [-2, 2], 50 | } 51 | runner = CliRunner() 52 | 53 | for option, [min_val, max_val] in options.items(): 54 | result = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "-10"]) 55 | assert result.exit_code == 1 56 | assert f"greater than or equal to {min_val}" in result.output 57 | result2 = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "10"]) 58 | assert result2.exit_code == 1 59 | assert f"less than or equal to {max_val}" in result2.output 60 | 61 | 62 | @pytest.mark.parametrize("model", ("gpt-4o-mini", "gpt-4o-audio-preview")) 63 | @pytest.mark.parametrize("filetype", ("mp3", "wav")) 64 | def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype): 65 | httpx_mock.add_response( 66 | method="HEAD", 67 | url=f"https://www.example.com/example.{filetype}", 68 | content=b"binary-data", 69 | headers={"Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav"}, 70 | ) 71 | if model == "gpt-4o-audio-preview": 72 | httpx_mock.add_response( 73 | method="POST", 74 | # chat completion request 75 | url="https://api.openai.com/v1/chat/completions", 76 | json={ 77 | "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh", 78 | "object": "chat.completion", 79 | "created": 1730871958, 80 | "model": "gpt-4o-audio-preview-2024-10-01", 81 | "choices": [ 82 | { 83 | "index": 0, 84 | "message": { 85 | "role": "assistant", 86 | "content": "Why did the pelican get kicked out of the restaurant?\n\nBecause he had a big bill and no way to pay it!", 87 | "refusal": None, 88 | }, 89 | "finish_reason": "stop", 90 | } 91 | ], 92 | "usage": { 93 | "prompt_tokens": 55, 94 | "completion_tokens": 25, 95 | "total_tokens": 80, 96 | "prompt_tokens_details": { 97 | "cached_tokens": 0, 98 | "audio_tokens": 44, 99 | "text_tokens": 11, 100 | "image_tokens": 0, 101 | }, 102 | "completion_tokens_details": { 103 | "reasoning_tokens": 0, 104 | "audio_tokens": 0, 105 | "text_tokens": 25, 106 | "accepted_prediction_tokens": 0, 107 | "rejected_prediction_tokens": 0, 108 | }, 109 | }, 110 | "system_fingerprint": "fp_49254d0e9b", 111 | }, 112 | headers={"Content-Type": "application/json"}, 113 | ) 114 | httpx_mock.add_response( 115 | method="GET", 116 | url=f"https://www.example.com/example.{filetype}", 117 | content=b"binary-data", 118 | headers={ 119 | "Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav" 120 | }, 121 | ) 122 | runner = CliRunner() 123 | result = runner.invoke( 124 | cli, 125 | [ 126 | "-m", 127 | model, 128 | "-a", 129 | f"https://www.example.com/example.{filetype}", 130 | "--no-stream", 131 | "--key", 132 | "x", 133 | ], 134 | ) 135 | if model == "gpt-4o-audio-preview": 136 | assert result.exit_code == 0 137 | assert result.output == ( 138 | "Why did the pelican get kicked out of the restaurant?\n\n" 139 | "Because he had a big bill and no way to pay it!\n" 140 | ) 141 | else: 142 | assert result.exit_code == 1 143 | long = "audio/mpeg" if filetype == "mp3" else "audio/wav" 144 | assert ( 145 | f"This model does not support attachments of type '{long}'" in result.output 146 | ) 147 | 148 | 149 | @pytest.mark.parametrize("async_", (False, True)) 150 | @pytest.mark.parametrize("usage", (None, "-u", "--usage")) 151 | def test_gpt4o_mini_sync_and_async(monkeypatch, tmpdir, httpx_mock, async_, usage): 152 | user_path = tmpdir / "user_dir" 153 | log_db = user_path / "logs.db" 154 | monkeypatch.setenv("LLM_USER_PATH", str(user_path)) 155 | assert not log_db.exists() 156 | httpx_mock.add_response( 157 | method="POST", 158 | # chat completion request 159 | url="https://api.openai.com/v1/chat/completions", 160 | json={ 161 | "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh", 162 | "object": "chat.completion", 163 | "created": 1730871958, 164 | "model": "gpt-4o-mini", 165 | "choices": [ 166 | { 167 | "index": 0, 168 | "message": { 169 | "role": "assistant", 170 | "content": "Ho ho ho", 171 | "refusal": None, 172 | }, 173 | "finish_reason": "stop", 174 | } 175 | ], 176 | "usage": { 177 | "prompt_tokens": 1000, 178 | "completion_tokens": 2000, 179 | "total_tokens": 12, 180 | }, 181 | "system_fingerprint": "fp_49254d0e9b", 182 | }, 183 | headers={"Content-Type": "application/json"}, 184 | ) 185 | runner = CliRunner(mix_stderr=False) 186 | args = ["-m", "gpt-4o-mini", "--key", "x", "--no-stream"] 187 | if usage: 188 | args.append(usage) 189 | if async_: 190 | args.append("--async") 191 | result = runner.invoke(cli, args, catch_exceptions=False) 192 | assert result.exit_code == 0 193 | assert result.output == "Ho ho ho\n" 194 | if usage: 195 | assert result.stderr == "Token usage: 1,000 input, 2,000 output\n" 196 | # Confirm it was correctly logged 197 | assert log_db.exists() 198 | db = sqlite_utils.Database(str(log_db)) 199 | assert db["responses"].count == 1 200 | row = next(db["responses"].rows) 201 | assert row["response"] == "Ho ho ho" 202 | -------------------------------------------------------------------------------- /tests/test_cli_options.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from llm.cli import cli 3 | import pytest 4 | import json 5 | 6 | 7 | @pytest.mark.parametrize( 8 | "args,expected_options,expected_error", 9 | ( 10 | ( 11 | ["gpt-4o-mini", "temperature", "0.5"], 12 | {"gpt-4o-mini": {"temperature": "0.5"}}, 13 | None, 14 | ), 15 | ( 16 | ["gpt-4o-mini", "temperature", "invalid"], 17 | {}, 18 | "Error: temperature\n Input should be a valid number", 19 | ), 20 | ( 21 | ["gpt-4o-mini", "not-an-option", "invalid"], 22 | {}, 23 | "Extra inputs are not permitted", 24 | ), 25 | ), 26 | ) 27 | def test_set_model_default_options(user_path, args, expected_options, expected_error): 28 | path = user_path / "model_options.json" 29 | assert not path.exists() 30 | runner = CliRunner() 31 | result = runner.invoke(cli, ["models", "options", "set"] + args) 32 | if not expected_error: 33 | assert result.exit_code == 0 34 | assert path.exists() 35 | data = json.loads(path.read_text("utf-8")) 36 | assert data == expected_options 37 | else: 38 | assert result.exit_code == 1 39 | assert expected_error in result.output 40 | 41 | 42 | def test_model_options_list_and_show(user_path): 43 | (user_path / "model_options.json").write_text( 44 | json.dumps( 45 | {"gpt-4o-mini": {"temperature": 0.5}, "gpt-4o": {"temperature": 0.7}} 46 | ), 47 | "utf-8", 48 | ) 49 | runner = CliRunner() 50 | result = runner.invoke(cli, ["models", "options", "list"]) 51 | assert result.exit_code == 0 52 | assert ( 53 | result.output 54 | == "gpt-4o-mini:\n temperature: 0.5\ngpt-4o:\n temperature: 0.7\n" 55 | ) 56 | result = runner.invoke(cli, ["models", "options", "show", "gpt-4o-mini"]) 57 | assert result.exit_code == 0 58 | assert result.output == "temperature: 0.5\n" 59 | 60 | 61 | def test_model_options_clear(user_path): 62 | path = user_path / "model_options.json" 63 | path.write_text( 64 | json.dumps( 65 | { 66 | "gpt-4o-mini": {"temperature": 0.5}, 67 | "gpt-4o": {"temperature": 0.7, "top_p": 0.9}, 68 | } 69 | ), 70 | "utf-8", 71 | ) 72 | assert path.exists() 73 | runner = CliRunner() 74 | # Clear all for gpt-4o-mini 75 | result = runner.invoke(cli, ["models", "options", "clear", "gpt-4o-mini"]) 76 | assert result.exit_code == 0 77 | # Clear just top_p for gpt-4o 78 | result2 = runner.invoke(cli, ["models", "options", "clear", "gpt-4o", "top_p"]) 79 | assert result2.exit_code == 0 80 | data = json.loads(path.read_text("utf-8")) 81 | assert data == {"gpt-4o": {"temperature": 0.7}} 82 | 83 | 84 | def test_prompt_uses_model_options(user_path): 85 | path = user_path / "model_options.json" 86 | path.write_text("{}", "utf-8") 87 | # Prompt should not use an option 88 | runner = CliRunner() 89 | result = runner.invoke(cli, ["-m", "echo", "prompt"]) 90 | assert result.exit_code == 0 91 | assert json.loads(result.output) == { 92 | "prompt": "prompt", 93 | "system": "", 94 | "attachments": [], 95 | "stream": True, 96 | "previous": [], 97 | } 98 | 99 | # Now set an option 100 | path.write_text(json.dumps({"echo": {"example_bool": True}}), "utf-8") 101 | 102 | result2 = runner.invoke(cli, ["-m", "echo", "prompt"]) 103 | assert result2.exit_code == 0 104 | assert json.loads(result2.output) == { 105 | "prompt": "prompt", 106 | "system": "", 107 | "attachments": [], 108 | "stream": True, 109 | "previous": [], 110 | "options": {"example_bool": True}, 111 | } 112 | 113 | # Option can be over-ridden 114 | result3 = runner.invoke( 115 | cli, ["-m", "echo", "prompt", "-o", "example_bool", "false"] 116 | ) 117 | assert result3.exit_code == 0 118 | assert json.loads(result3.output) == { 119 | "prompt": "prompt", 120 | "system": "", 121 | "attachments": [], 122 | "stream": True, 123 | "previous": [], 124 | "options": {"example_bool": False}, 125 | } 126 | # Using an alias should also pick up that option 127 | aliases_path = user_path / "aliases.json" 128 | aliases_path.write_text('{"e": "echo"}', "utf-8") 129 | result4 = runner.invoke(cli, ["-m", "e", "prompt"]) 130 | assert result4.exit_code == 0 131 | assert json.loads(result4.output) == { 132 | "prompt": "prompt", 133 | "system": "", 134 | "attachments": [], 135 | "stream": True, 136 | "previous": [], 137 | "options": {"example_bool": True}, 138 | } 139 | -------------------------------------------------------------------------------- /tests/test_embed.py: -------------------------------------------------------------------------------- 1 | import json 2 | import llm 3 | from llm.embeddings import Entry 4 | import pytest 5 | import sqlite_utils 6 | from unittest.mock import ANY 7 | 8 | 9 | def test_demo_plugin(): 10 | model = llm.get_embedding_model("embed-demo") 11 | assert model.embed("hello world") == [5, 5] + [0] * 14 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "batch_size,expected_batches", 16 | ( 17 | (None, 100), 18 | (10, 100), 19 | ), 20 | ) 21 | def test_embed_huge_list(batch_size, expected_batches): 22 | model = llm.get_embedding_model("embed-demo") 23 | huge_list = ("hello {}".format(i) for i in range(1000)) 24 | kwargs = {} 25 | if batch_size: 26 | kwargs["batch_size"] = batch_size 27 | results = model.embed_multi(huge_list, **kwargs) 28 | assert repr(type(results)) == "" 29 | first_twos = {} 30 | for result in results: 31 | key = (result[0], result[1]) 32 | first_twos[key] = first_twos.get(key, 0) + 1 33 | assert first_twos == {(5, 1): 10, (5, 2): 90, (5, 3): 900} 34 | assert model.batch_count == expected_batches 35 | 36 | 37 | def test_embed_store(collection): 38 | collection.embed("3", "hello world again", store=True) 39 | assert collection.db["embeddings"].count == 3 40 | assert ( 41 | next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["content"] 42 | == "hello world again" 43 | ) 44 | 45 | 46 | def test_embed_metadata(collection): 47 | collection.embed("3", "hello yet again", metadata={"foo": "bar"}, store=True) 48 | assert collection.db["embeddings"].count == 3 49 | assert json.loads( 50 | next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["metadata"] 51 | ) == {"foo": "bar"} 52 | entry = collection.similar("hello yet again")[0] 53 | assert entry.id == "3" 54 | assert entry.metadata == {"foo": "bar"} 55 | assert entry.content == "hello yet again" 56 | 57 | 58 | def test_collection(collection): 59 | assert collection.id == 1 60 | assert collection.count() == 2 61 | # Check that the embeddings are there 62 | rows = list(collection.db["embeddings"].rows) 63 | assert rows == [ 64 | { 65 | "collection_id": 1, 66 | "id": "1", 67 | "embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 68 | "content": None, 69 | "content_blob": None, 70 | "content_hash": collection.content_hash("hello world"), 71 | "metadata": None, 72 | "updated": ANY, 73 | }, 74 | { 75 | "collection_id": 1, 76 | "id": "2", 77 | "embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 78 | "content": None, 79 | "content_blob": None, 80 | "content_hash": collection.content_hash("goodbye world"), 81 | "metadata": None, 82 | "updated": ANY, 83 | }, 84 | ] 85 | assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0 86 | 87 | 88 | def test_similar(collection): 89 | results = list(collection.similar("hello world")) 90 | assert results == [ 91 | Entry(id="1", score=pytest.approx(0.9999999999999999)), 92 | Entry(id="2", score=pytest.approx(0.9863939238321437)), 93 | ] 94 | 95 | 96 | def test_similar_prefixed(collection): 97 | results = list(collection.similar("hello world", prefix="2")) 98 | assert results == [ 99 | Entry(id="2", score=pytest.approx(0.9863939238321437)), 100 | ] 101 | 102 | 103 | def test_similar_by_id(collection): 104 | results = list(collection.similar_by_id("1")) 105 | assert results == [ 106 | Entry(id="2", score=pytest.approx(0.9863939238321437)), 107 | ] 108 | 109 | 110 | @pytest.mark.parametrize( 111 | "batch_size,expected_batches", 112 | ( 113 | (None, 100), 114 | (5, 200), 115 | ), 116 | ) 117 | @pytest.mark.parametrize("with_metadata", (False, True)) 118 | def test_embed_multi(with_metadata, batch_size, expected_batches): 119 | db = sqlite_utils.Database(memory=True) 120 | collection = llm.Collection("test", db, model_id="embed-demo") 121 | model = collection.model() 122 | assert getattr(model, "batch_count", 0) == 0 123 | ids_and_texts = ((str(i), "hello {}".format(i)) for i in range(1000)) 124 | kwargs = {} 125 | if batch_size is not None: 126 | kwargs["batch_size"] = batch_size 127 | if with_metadata: 128 | ids_and_texts = ((id, text, {"meta": id}) for id, text in ids_and_texts) 129 | collection.embed_multi_with_metadata(ids_and_texts, **kwargs) 130 | else: 131 | # Exercise store=True here too 132 | collection.embed_multi(ids_and_texts, store=True, **kwargs) 133 | rows = list(db["embeddings"].rows) 134 | assert len(rows) == 1000 135 | rows_with_metadata = [row for row in rows if row["metadata"] is not None] 136 | rows_with_content = [row for row in rows if row["content"] is not None] 137 | if with_metadata: 138 | assert len(rows_with_metadata) == 1000 139 | assert len(rows_with_content) == 0 140 | else: 141 | assert len(rows_with_metadata) == 0 142 | assert len(rows_with_content) == 1000 143 | # Every row should have content_hash set 144 | assert all(row["content_hash"] is not None for row in rows) 145 | # Check batch count 146 | assert collection.model().batch_count == expected_batches 147 | 148 | 149 | def test_collection_delete(collection): 150 | db = collection.db 151 | assert db["embeddings"].count == 2 152 | assert db["collections"].count == 1 153 | collection.delete() 154 | assert db["embeddings"].count == 0 155 | assert db["collections"].count == 0 156 | 157 | 158 | def test_binary_only_and_text_only_embedding_models(): 159 | binary_only = llm.get_embedding_model("embed-binary-only") 160 | text_only = llm.get_embedding_model("embed-text-only") 161 | 162 | assert binary_only.supports_binary 163 | assert not binary_only.supports_text 164 | assert not text_only.supports_binary 165 | assert text_only.supports_text 166 | 167 | with pytest.raises(ValueError): 168 | binary_only.embed("hello world") 169 | 170 | binary_only.embed(b"hello world") 171 | 172 | with pytest.raises(ValueError): 173 | text_only.embed(b"hello world") 174 | 175 | text_only.embed("hello world") 176 | 177 | # Try the multi versions too 178 | # Have to call list() on this or the generator is not evaluated 179 | with pytest.raises(ValueError): 180 | list(binary_only.embed_multi(["hello world"])) 181 | 182 | list(binary_only.embed_multi([b"hello world"])) 183 | 184 | with pytest.raises(ValueError): 185 | list(text_only.embed_multi([b"hello world"])) 186 | 187 | list(text_only.embed_multi(["hello world"])) 188 | -------------------------------------------------------------------------------- /tests/test_encode_decode.py: -------------------------------------------------------------------------------- 1 | import llm 2 | import pytest 3 | import numpy as np 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "array", 8 | ( 9 | (0.0, 1.0, 1.5), 10 | (3423.0, 222.0, -1234.5), 11 | ), 12 | ) 13 | def test_roundtrip(array): 14 | encoded = llm.encode(array) 15 | decoded = llm.decode(encoded) 16 | assert decoded == array 17 | # Try with numpy as well 18 | numpy_decoded = np.frombuffer(encoded, "