├── .github
├── FUNDING.yml
├── dependabot.yml
└── workflows
│ ├── cog.yml
│ ├── publish.yml
│ ├── stable-docs.yml
│ └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── AGENTS.md
├── Justfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs
├── .gitignore
├── Makefile
├── _templates
│ └── base.html
├── aliases.md
├── changelog.md
├── conf.py
├── contributing.md
├── embeddings
│ ├── cli.md
│ ├── index.md
│ ├── python-api.md
│ ├── storage.md
│ └── writing-plugins.md
├── fragments.md
├── help.md
├── index.md
├── logging.md
├── openai-models.md
├── other-models.md
├── plugins
│ ├── advanced-model-plugins.md
│ ├── directory.md
│ ├── index.md
│ ├── installing-plugins.md
│ ├── llm-markov
│ │ ├── llm_markov.py
│ │ └── pyproject.toml
│ ├── plugin-hooks.md
│ ├── plugin-utilities.md
│ └── tutorial-model-plugin.md
├── python-api.md
├── related-tools.md
├── requirements.txt
├── schemas.md
├── setup.md
├── templates.md
├── tools.md
└── usage.md
├── llm
├── __init__.py
├── __main__.py
├── cli.py
├── default_plugins
│ ├── __init__.py
│ ├── default_tools.py
│ └── openai_models.py
├── embeddings.py
├── embeddings_migrations.py
├── errors.py
├── hookspecs.py
├── migrations.py
├── models.py
├── plugins.py
├── py.typed
├── templates.py
├── tools.py
└── utils.py
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── ruff.toml
└── tests
├── cassettes
└── test_tools
│ ├── test_tool_use_basic.yaml
│ └── test_tool_use_chain_of_two_calls.yaml
├── conftest.py
├── test-llm-load-plugins.sh
├── test_aliases.py
├── test_async.py
├── test_attachments.py
├── test_chat.py
├── test_cli_openai_models.py
├── test_cli_options.py
├── test_embed.py
├── test_embed_cli.py
├── test_encode_decode.py
├── test_fragments_cli.py
├── test_keys.py
├── test_llm.py
├── test_llm_logs.py
├── test_migrate.py
├── test_plugins.py
├── test_templates.py
├── test_tools.py
└── test_utils.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [simonw]
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 | directory: "/"
5 | schedule:
6 | interval: daily
7 | groups:
8 | python-packages:
9 | patterns:
10 | - "*"
11 |
--------------------------------------------------------------------------------
/.github/workflows/cog.yml:
--------------------------------------------------------------------------------
1 | name: Run Cog
2 |
3 | on:
4 | pull_request:
5 | types: [opened, synchronize]
6 |
7 | permissions:
8 | contents: write
9 | pull-requests: write
10 |
11 | jobs:
12 | run-cog:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v4
17 | with:
18 | ref: ${{ github.head_ref }}
19 |
20 | - name: Set up Python 3.11
21 | uses: actions/setup-python@v5
22 | with:
23 | python-version: '3.11'
24 |
25 | - name: Install dependencies
26 | run: |
27 | pip install -e '.[test]'
28 | pip install -r docs/requirements.txt
29 |
30 | - name: Run cog
31 | run: |
32 | cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md
33 |
34 | - name: Check for changes
35 | id: check-changes
36 | run: |
37 | if [ -n "$(git diff)" ]; then
38 | echo "changes=true" >> $GITHUB_OUTPUT
39 | else
40 | echo "changes=false" >> $GITHUB_OUTPUT
41 | fi
42 |
43 | - name: Commit and push if changed
44 | if: steps.check-changes.outputs.changes == 'true'
45 | run: |
46 | git config --local user.email "github-actions[bot]@users.noreply.github.com"
47 | git config --local user.name "github-actions[bot]"
48 | git add -A
49 | git commit -m "Ran cog"
50 | git push
51 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish Python Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | test:
12 | runs-on: ubuntu-latest
13 | strategy:
14 | matrix:
15 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
16 | steps:
17 | - uses: actions/checkout@v4
18 | - name: Set up Python ${{ matrix.python-version }}
19 | uses: actions/setup-python@v5
20 | with:
21 | python-version: ${{ matrix.python-version }}
22 | cache: pip
23 | cache-dependency-path: setup.py
24 | - name: Install dependencies
25 | run: |
26 | pip install '.[test]'
27 | - name: Run tests
28 | run: |
29 | pytest
30 | deploy:
31 | runs-on: ubuntu-latest
32 | environment: release
33 | permissions:
34 | id-token: write
35 | needs: [test]
36 | steps:
37 | - uses: actions/checkout@v4
38 | - name: Set up Python
39 | uses: actions/setup-python@v5
40 | with:
41 | python-version: '3.13'
42 | cache: pip
43 | cache-dependency-path: setup.py
44 | - name: Install dependencies
45 | run: |
46 | pip install setuptools wheel build
47 | - name: Build
48 | run: |
49 | python -m build
50 | - name: Publish
51 | uses: pypa/gh-action-pypi-publish@release/v1
52 |
--------------------------------------------------------------------------------
/.github/workflows/stable-docs.yml:
--------------------------------------------------------------------------------
1 | name: Update Stable Docs
2 |
3 | on:
4 | release:
5 | types: [published]
6 | push:
7 | branches:
8 | - main
9 |
10 | permissions:
11 | contents: write
12 |
13 | jobs:
14 | update_stable_docs:
15 | runs-on: ubuntu-latest
16 | steps:
17 | - name: Checkout repository
18 | uses: actions/checkout@v3
19 | with:
20 | fetch-depth: 0 # We need all commits to find docs/ changes
21 | - name: Set up Git user
22 | run: |
23 | git config user.name "Automated"
24 | git config user.email "actions@users.noreply.github.com"
25 | - name: Create stable branch if it does not yet exist
26 | run: |
27 | if ! git ls-remote --heads origin stable | grep stable; then
28 | git checkout -b stable
29 | # If there are any releases, copy docs/ in from most recent
30 | LATEST_RELEASE=$(git tag | sort -Vr | head -n1)
31 | if [ -n "$LATEST_RELEASE" ]; then
32 | rm -rf docs/
33 | git checkout $LATEST_RELEASE -- docs/
34 | fi
35 | git commit -m "Populate docs/ from $LATEST_RELEASE" || echo "No changes"
36 | git push -u origin stable
37 | fi
38 | - name: Handle Release
39 | if: github.event_name == 'release' && !github.event.release.prerelease
40 | run: |
41 | git fetch --all
42 | git checkout stable
43 | git reset --hard ${GITHUB_REF#refs/tags/}
44 | git push origin stable --force
45 | - name: Handle Commit to Main
46 | if: contains(github.event.head_commit.message, '!stable-docs')
47 | run: |
48 | git fetch origin
49 | git checkout -b stable origin/stable
50 | # Get the list of modified files in docs/ from the current commit
51 | FILES=$(git diff-tree --no-commit-id --name-only -r ${{ github.sha }} -- docs/)
52 | # Check if the list of files is non-empty
53 | if [[ -n "$FILES" ]]; then
54 | # Checkout those files to the stable branch to over-write with their contents
55 | for FILE in $FILES; do
56 | git checkout ${{ github.sha }} -- $FILE
57 | done
58 | git add docs/
59 | git commit -m "Doc changes from ${{ github.sha }}"
60 | git push origin stable
61 | else
62 | echo "No changes to docs/ in this commit."
63 | exit 0
64 | fi
65 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on: [push, pull_request]
4 |
5 | permissions:
6 | contents: read
7 |
8 | jobs:
9 | test:
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | os: [ubuntu-latest, macos-latest, windows-latest]
14 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
15 | steps:
16 | - uses: actions/checkout@v4
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v5
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 | cache: pip
22 | cache-dependency-path: setup.py
23 | - name: Install dependencies
24 | run: |
25 | pip install -e '.[test]'
26 | - name: Run tests
27 | run: |
28 | python -m pytest -vv
29 | - name: Check if cog needs to be run
30 | if: matrix.os != 'windows-latest'
31 | run: |
32 | cog --check \
33 | -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \
34 | docs/**/*.md docs/*.md
35 | - name: Run Black
36 | if: matrix.os != 'windows-latest'
37 | run: |
38 | black --check .
39 | - name: Run mypy
40 | if: matrix.os != 'windows-latest'
41 | run: |
42 | mypy llm
43 | - name: Run ruff
44 | if: matrix.os != 'windows-latest'
45 | run: |
46 | ruff check .
47 | - name: Check it builds
48 | run: |
49 | python -m build
50 | - name: Run test-llm-load-plugins.sh
51 | if: matrix.os != 'windows-latest'
52 | run: |
53 | llm install llm-cluster llm-mistral
54 | ./tests/test-llm-load-plugins.sh
55 | - name: Upload artifact of builds
56 | if: matrix.python-version == '3.13' && matrix.os == 'ubuntu-latest'
57 | uses: actions/upload-artifact@v4
58 | with:
59 | name: dist-${{ matrix.os }}-${{ matrix.python-version }}
60 | path: dist/*
61 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | venv
6 | .eggs
7 | .pytest_cache
8 | *.egg-info
9 | .DS_Store
10 | .idea/
11 | .vscode/
12 | uv.lock
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: "3.11"
7 |
8 | sphinx:
9 | configuration: docs/conf.py
10 |
11 | formats:
12 | - pdf
13 | - epub
14 |
15 | python:
16 | install:
17 | - requirements: docs/requirements.txt
18 | - method: pip
19 | path: .
20 |
--------------------------------------------------------------------------------
/AGENTS.md:
--------------------------------------------------------------------------------
1 | # AGENTS.md
2 |
3 | This project uses a Python environment for development and tests.
4 |
5 | ## Setting up a development environment
6 |
7 | 1. Install the project with its test dependencies:
8 | ```bash
9 | pip install -e '.[test]'
10 | ```
11 | 2. Run the tests:
12 | ```bash
13 | pytest
14 | ```
15 |
16 | ## Building the documentation
17 |
18 | Run the following commands if you want to build the docs locally:
19 |
20 | ```bash
21 | cd docs
22 | pip install -r requirements.txt
23 | make html
24 | ```
25 |
--------------------------------------------------------------------------------
/Justfile:
--------------------------------------------------------------------------------
1 | # Run tests and linters
2 | @default: test lint
3 |
4 | # Install dependencies and test dependencies
5 | @init:
6 | pipenv run pip install -e '.[test]'
7 |
8 | # Run pytest with supplied options
9 | @test *options:
10 | pipenv run pytest {{options}}
11 |
12 | # Run linters
13 | @lint:
14 | echo "Linters..."
15 | echo " Black"
16 | pipenv run black . --check
17 | echo " cog"
18 | pipenv run cog --check \
19 | -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" \
20 | README.md docs/*.md
21 | echo " mypy"
22 | pipenv run mypy llm
23 | echo " ruff"
24 | pipenv run ruff check .
25 |
26 | # Run mypy
27 | @mypy:
28 | pipenv run mypy llm
29 |
30 | # Rebuild docs with cog
31 | @cog:
32 | pipenv run cog -r -p "import sys, os; sys._called_from_test=True; os.environ['LLM_USER_PATH'] = '/tmp'" docs/**/*.md docs/*.md README.md
33 |
34 | # Serve live docs on localhost:8000
35 | @docs: cog
36 | rm -rf docs/_build
37 | cd docs && pipenv run make livehtml
38 |
39 | # Apply Black
40 | @black:
41 | pipenv run black .
42 |
43 | # Run automatic fixes
44 | @fix: cog
45 | pipenv run ruff check . --fix
46 | pipenv run black .
47 |
48 | # Push commit if tests pass
49 | @push: test lint
50 | git push
51 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-exclude tests/*
2 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = sqlite-utils
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
22 | livehtml:
23 | sphinx-autobuild -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(0)
24 |
--------------------------------------------------------------------------------
/docs/_templates/base.html:
--------------------------------------------------------------------------------
1 | {%- extends "!base.html" %}
2 |
3 | {%- block htmltitle -%}
4 | {% if not docstitle %}
5 |
{{ title|striptags|e }}
6 | {% elif pagename == master_doc %}
7 | LLM: A CLI utility and Python library for interacting with Large Language Models
8 | {% else %}
9 | {{ title|striptags|e }} - {{ docstitle|striptags|e }}
10 | {% endif %}
11 | {%- endblock -%}
12 |
13 | {% block site_meta %}
14 | {{ super() }}
15 |
16 | {% endblock %}
17 |
--------------------------------------------------------------------------------
/docs/aliases.md:
--------------------------------------------------------------------------------
1 | (aliases)=
2 | # Model aliases
3 |
4 | LLM supports model aliases, which allow you to refer to a model by a short name instead of its full ID.
5 |
6 | ## Listing aliases
7 |
8 | To list current aliases, run this:
9 |
10 | ```bash
11 | llm aliases
12 | ```
13 | Example output:
14 |
15 |
21 | ```
22 | 4o : gpt-4o
23 | chatgpt-4o : chatgpt-4o-latest
24 | 4o-mini : gpt-4o-mini
25 | 4.1 : gpt-4.1
26 | 4.1-mini : gpt-4.1-mini
27 | 4.1-nano : gpt-4.1-nano
28 | 3.5 : gpt-3.5-turbo
29 | chatgpt : gpt-3.5-turbo
30 | chatgpt-16k : gpt-3.5-turbo-16k
31 | 3.5-16k : gpt-3.5-turbo-16k
32 | 4 : gpt-4
33 | gpt4 : gpt-4
34 | 4-32k : gpt-4-32k
35 | gpt-4-turbo-preview : gpt-4-turbo
36 | 4-turbo : gpt-4-turbo
37 | 4t : gpt-4-turbo
38 | gpt-4.5 : gpt-4.5-preview
39 | 3.5-instruct : gpt-3.5-turbo-instruct
40 | chatgpt-instruct : gpt-3.5-turbo-instruct
41 | ada : text-embedding-ada-002 (embedding)
42 | ada-002 : text-embedding-ada-002 (embedding)
43 | 3-small : text-embedding-3-small (embedding)
44 | 3-large : text-embedding-3-large (embedding)
45 | 3-small-512 : text-embedding-3-small-512 (embedding)
46 | 3-large-256 : text-embedding-3-large-256 (embedding)
47 | 3-large-1024 : text-embedding-3-large-1024 (embedding)
48 | ```
49 |
50 |
51 | Add `--json` to get that list back as JSON:
52 |
53 | ```bash
54 | llm aliases list --json
55 | ```
56 | Example output:
57 | ```json
58 | {
59 | "3.5": "gpt-3.5-turbo",
60 | "chatgpt": "gpt-3.5-turbo",
61 | "4": "gpt-4",
62 | "gpt4": "gpt-4",
63 | "ada": "ada-002"
64 | }
65 | ```
66 |
67 | ## Adding a new alias
68 |
69 | The `llm aliases set ` command can be used to add a new alias:
70 |
71 | ```bash
72 | llm aliases set mini gpt-4o-mini
73 | ```
74 | You can also pass one or more `-q search` options to set an alias on the first model matching those search terms:
75 | ```bash
76 | llm aliases set mini -q 4o -q mini
77 | ```
78 | Now you can run the `gpt-4o-mini` model using the `mini` alias like this:
79 | ```bash
80 | llm -m mini 'An epic Greek-style saga about a cheesecake that builds a SQL database from scratch'
81 | ```
82 | Aliases can be set for both regular models and {ref}`embedding models ` using the same command. To set an alias of `oai` for the OpenAI `ada-002` embedding model use this:
83 | ```bash
84 | llm aliases set oai ada-002
85 | ```
86 | Now you can embed a string using that model like so:
87 | ```bash
88 | llm embed -c 'hello world' -m oai
89 | ```
90 | Output:
91 | ```
92 | [-0.014945968054234982, 0.0014304015785455704, ...]
93 | ```
94 |
95 | ## Removing an alias
96 |
97 | The `llm aliases remove ` command will remove the specified alias:
98 |
99 | ```bash
100 | llm aliases remove mini
101 | ```
102 |
103 | ## Viewing the aliases file
104 |
105 | Aliases are stored in an `aliases.json` file in the LLM configuration directory.
106 |
107 | To see the path to that file, run this:
108 |
109 | ```bash
110 | llm aliases path
111 | ```
112 | To view the content of that file, run this:
113 |
114 | ```bash
115 | cat "$(llm aliases path)"
116 | ```
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from subprocess import PIPE, Popen
5 |
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | # import os
20 | # import sys
21 | # sys.path.insert(0, os.path.abspath('.'))
22 |
23 |
24 | # -- General configuration ------------------------------------------------
25 |
26 | # If your documentation needs a minimal Sphinx version, state it here.
27 | #
28 | # needs_sphinx = '1.0'
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | "myst_parser",
35 | "sphinx_copybutton",
36 | "sphinx_markdown_builder",
37 | "sphinx.ext.autodoc",
38 | ]
39 | myst_enable_extensions = ["colon_fence"]
40 |
41 | markdown_http_base = "https://llm.datasette.io/en/stable"
42 | markdown_uri_doc_suffix = ".html"
43 |
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ["_templates"]
46 |
47 | # The suffix(es) of source filenames.
48 | # You can specify multiple suffix as a list of string:
49 | #
50 | # source_suffix = ['.rst', '.md']
51 | source_suffix = ".rst"
52 |
53 | # The master toctree document.
54 | master_doc = "index"
55 |
56 | # General information about the project.
57 | project = "LLM"
58 | copyright = "2025, Simon Willison"
59 | author = "Simon Willison"
60 |
61 | # The version info for the project you're documenting, acts as replacement for
62 | # |version| and |release|, also used in various other places throughout the
63 | # built documents.
64 | #
65 | # The short X.Y version.
66 | pipe = Popen("git describe --tags --always", stdout=PIPE, shell=True)
67 | git_version = pipe.stdout.read().decode("utf8")
68 |
69 | if git_version:
70 | version = git_version.rsplit("-", 1)[0]
71 | release = git_version
72 | else:
73 | version = ""
74 | release = ""
75 |
76 | # The language for content autogenerated by Sphinx. Refer to documentation
77 | # for a list of supported languages.
78 | #
79 | # This is also used if you do content translation via gettext catalogs.
80 | # Usually you set "language" from the command line for these cases.
81 | language = "en"
82 |
83 | # List of patterns, relative to source directory, that match files and
84 | # directories to ignore when looking for source files.
85 | # This patterns also effect to html_static_path and html_extra_path
86 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
87 |
88 | # The name of the Pygments (syntax highlighting) style to use.
89 | pygments_style = "sphinx"
90 |
91 | # If true, `todo` and `todoList` produce output, else they produce nothing.
92 | todo_include_todos = False
93 |
94 |
95 | # -- Options for HTML output ----------------------------------------------
96 |
97 | # The theme to use for HTML and HTML Help pages. See the documentation for
98 | # a list of builtin themes.
99 | #
100 | html_theme = "furo"
101 |
102 | # Theme options are theme-specific and customize the look and feel of a theme
103 | # further. For a list of options available for each theme, see the
104 | # documentation.
105 |
106 | html_theme_options = {}
107 | html_title = "LLM"
108 |
109 | # Add any paths that contain custom static files (such as style sheets) here,
110 | # relative to this directory. They are copied after the builtin static files,
111 | # so a file named "default.css" will overwrite the builtin "default.css".
112 | html_static_path = []
113 |
114 |
115 | # -- Options for HTMLHelp output ------------------------------------------
116 |
117 | # Output file base name for HTML help builder.
118 | htmlhelp_basename = "llm-doc"
119 |
120 |
121 | # -- Options for LaTeX output ---------------------------------------------
122 |
123 | latex_elements = {
124 | # The paper size ('letterpaper' or 'a4paper').
125 | #
126 | # 'papersize': 'letterpaper',
127 | # The font size ('10pt', '11pt' or '12pt').
128 | #
129 | # 'pointsize': '10pt',
130 | # Additional stuff for the LaTeX preamble.
131 | #
132 | # 'preamble': '',
133 | # Latex figure (float) alignment
134 | #
135 | # 'figure_align': 'htbp',
136 | }
137 |
138 | # Grouping the document tree into LaTeX files. List of tuples
139 | # (source start file, target name, title,
140 | # author, documentclass [howto, manual, or own class]).
141 | latex_documents = [
142 | (
143 | master_doc,
144 | "llm.tex",
145 | "LLM documentation",
146 | "Simon Willison",
147 | "manual",
148 | )
149 | ]
150 |
151 |
152 | # -- Options for manual page output ---------------------------------------
153 |
154 | # One entry per manual page. List of tuples
155 | # (source start file, name, description, authors, manual section).
156 | man_pages = [
157 | (
158 | master_doc,
159 | "llm",
160 | "LLM documentation",
161 | [author],
162 | 1,
163 | )
164 | ]
165 |
166 |
167 | # -- Options for Texinfo output -------------------------------------------
168 |
169 | # Grouping the document tree into Texinfo files. List of tuples
170 | # (source start file, target name, title, author,
171 | # dir menu entry, description, category)
172 | texinfo_documents = [
173 | (
174 | master_doc,
175 | "llm",
176 | "LLM documentation",
177 | author,
178 | "llm",
179 | " Access large language models from the command-line ",
180 | "Miscellaneous",
181 | )
182 | ]
183 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | To contribute to this tool, first checkout the code. Then create a new virtual environment:
4 | ```bash
5 | cd llm
6 | python -m venv venv
7 | source venv/bin/activate
8 | ```
9 | Or if you are using `pipenv`:
10 | ```bash
11 | pipenv shell
12 | ```
13 | Now install the dependencies and test dependencies:
14 | ```bash
15 | pip install -e '.[test]'
16 | ```
17 | To run the tests:
18 | ```bash
19 | pytest
20 | ```
21 |
22 | ## Updating recorded HTTP API interactions and associated snapshots
23 |
24 | This project uses [pytest-recording](https://github.com/kiwicom/pytest-recording) to record OpenAI API responses for some of the tests, and [syrupy](https://github.com/syrupy-project/syrupy) to capture snapshots of their results.
25 |
26 | If you add a new test that calls the API you can capture the API response and snapshot like this:
27 | ```bash
28 | PYTEST_OPENAI_API_KEY="$(llm keys get openai)" pytest --record-mode once --snapshot-update
29 | ```
30 | Then review the new snapshots in `tests/__snapshots__/` to make sure they look correct.
31 |
32 | ## Debugging tricks
33 |
34 | The default OpenAI plugin has a debugging mechanism for showing the exact requests and responses that were sent to the OpenAI API.
35 |
36 | Set the `LLM_OPENAI_SHOW_RESPONSES` environment variable like this:
37 | ```bash
38 | LLM_OPENAI_SHOW_RESPONSES=1 llm -m chatgpt 'three word slogan for an an otter-run bakery'
39 | ```
40 | This will output details of the API requests and responses to the console.
41 |
42 | Use `--no-stream` to see a more readable version of the body that avoids streaming the response:
43 |
44 | ```bash
45 | LLM_OPENAI_SHOW_RESPONSES=1 llm -m chatgpt --no-stream \
46 | 'three word slogan for an an otter-run bakery'
47 | ```
48 |
49 | ## Documentation
50 |
51 | Documentation for this project uses [MyST](https://myst-parser.readthedocs.io/) - it is written in Markdown and rendered using Sphinx.
52 |
53 | To build the documentation locally, run the following:
54 | ```bash
55 | cd docs
56 | pip install -r requirements.txt
57 | make livehtml
58 | ```
59 | This will start a live preview server, using [sphinx-autobuild](https://pypi.org/project/sphinx-autobuild/).
60 |
61 | The CLI `--help` examples in the documentation are managed using [Cog](https://github.com/nedbat/cog). Update those files like this:
62 | ```bash
63 | just cog
64 | ```
65 | You'll need [Just](https://github.com/casey/just) installed to run this command.
66 |
67 | ## Release process
68 |
69 | To release a new version:
70 |
71 | 1. Update `docs/changelog.md` with the new changes.
72 | 2. Update the version number in `pyproject.toml`
73 | 3. [Create a GitHub release](https://github.com/simonw/llm/releases/new) for the new version.
74 | 4. Wait for the package to push to PyPI and then...
75 | 5. Run the [regenerate.yaml](https://github.com/simonw/homebrew-llm/actions/workflows/regenerate.yaml) workflow to update the Homebrew tap to the latest version.
76 |
--------------------------------------------------------------------------------
/docs/embeddings/index.md:
--------------------------------------------------------------------------------
1 | (embeddings)=
2 | # Embeddings
3 |
4 | Embedding models allow you to take a piece of text - a word, sentence, paragraph or even a whole article, and convert that into an array of floating point numbers.
5 |
6 | This floating point array is called an "embedding vector", and works as a numerical representation of the semantic meaning of the content in a many-multi-dimensional space.
7 |
8 | By calculating the distance between embedding vectors, we can identify which content is semantically "nearest" to other content.
9 |
10 | This can be used to build features like related article lookups. It can also be used to build semantic search, where a user can search for a phrase and get back results that are semantically similar to that phrase even if they do not share any exact keywords.
11 |
12 | Some embedding models like [CLIP](https://github.com/simonw/llm-clip) can even work against binary files such as images. These can be used to search for images that are similar to other images, or to search for images that are semantically similar to a piece of text.
13 |
14 | LLM supports multiple embedding models through {ref}`plugins `. Once installed, an embedding model can be used on the command-line or via the Python API to calculate and store embeddings for content, and then to perform similarity searches against those embeddings.
15 |
16 | See [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/) for an extended explanation of embeddings, why they are useful and what you can do with them.
17 |
18 | ```{toctree}
19 | ---
20 | maxdepth: 3
21 | ---
22 | cli
23 | python-api
24 | writing-plugins
25 | storage
26 | ```
27 |
--------------------------------------------------------------------------------
/docs/embeddings/python-api.md:
--------------------------------------------------------------------------------
1 | (embeddings-python-api)=
2 | # Using embeddings from Python
3 |
4 | You can load an embedding model using its model ID or alias like this:
5 | ```python
6 | import llm
7 |
8 | embedding_model = llm.get_embedding_model("3-small")
9 | ```
10 | To embed a string, returning a Python list of floating point numbers, use the `.embed()` method:
11 | ```python
12 | vector = embedding_model.embed("my happy hound")
13 | ```
14 | If the embedding model can handle binary input, you can call `.embed()` with a byte string instead. You can check the `supports_binary` property to see if this is supported:
15 | ```python
16 | if embedding_model.supports_binary:
17 | vector = embedding_model.embed(open("my-image.jpg", "rb").read())
18 | ```
19 | The `embedding_model.supports_text` property indicates if the model supports text input.
20 |
21 | Many embeddings models are more efficient when you embed multiple strings or binary strings at once. To embed multiple strings at once, use the `.embed_multi()` method:
22 | ```python
23 | vectors = list(embedding_model.embed_multi(["my happy hound", "my dissatisfied cat"]))
24 | ```
25 | This returns a generator that yields one embedding vector per string.
26 |
27 | Embeddings are calculated in batches. By default all items will be processed in a single batch, unless the underlying embedding model has defined its own preferred batch size. You can pass a custom batch size using `batch_size=N`, for example:
28 |
29 | ```python
30 | vectors = list(embedding_model.embed_multi(lines_from_file, batch_size=20))
31 | ```
32 |
33 | (embeddings-python-collections)=
34 | ## Working with collections
35 |
36 | The `llm.Collection` class can be used to work with **collections** of embeddings from Python code.
37 |
38 | A collection is a named group of embedding vectors, each stored along with their IDs in a SQLite database table.
39 |
40 | To work with embeddings in this way you will need an instance of a [sqlite-utils Database](https://sqlite-utils.datasette.io/en/stable/python-api.html#connecting-to-or-creating-a-database) object. You can then pass that to the `llm.Collection` constructor along with the unique string name of the collection and the ID of the embedding model you will be using with that collection:
41 |
42 | ```python
43 | import sqlite_utils
44 | import llm
45 |
46 | # This collection will use an in-memory database that will be
47 | # discarded when the Python process exits
48 | collection = llm.Collection("entries", model_id="3-small")
49 |
50 | # Or you can persist the database to disk like this:
51 | db = sqlite_utils.Database("my-embeddings.db")
52 | collection = llm.Collection("entries", db, model_id="3-small")
53 |
54 | # You can pass a model directly using model= instead of model_id=
55 | embedding_model = llm.get_embedding_model("3-small")
56 | collection = llm.Collection("entries", db, model=embedding_model)
57 | ```
58 | If the collection already exists in the database you can omit the `model` or `model_id` argument - the model ID will be read from the `collections` table.
59 |
60 | To embed a single string and store it in the collection, use the `embed()` method:
61 |
62 | ```python
63 | collection.embed("hound", "my happy hound")
64 | ```
65 | This stores the embedding for the string "my happy hound" in the `entries` collection under the key `hound`.
66 |
67 | Add `store=True` to store the text content itself in the database table along with the embedding vector.
68 |
69 | To attach additional metadata to an item, pass a JSON-compatible dictionary as the `metadata=` argument:
70 |
71 | ```python
72 | collection.embed("hound", "my happy hound", metadata={"name": "Hound"}, store=True)
73 | ```
74 | This additional metadata will be stored as JSON in the `metadata` column of the embeddings database table.
75 |
76 | (embeddings-python-bulk)=
77 | ### Storing embeddings in bulk
78 |
79 | The `collection.embed_multi()` method can be used to store embeddings for multiple items at once. This can be more efficient for some embedding models.
80 |
81 | ```python
82 | collection.embed_multi(
83 | [
84 | ("hound", "my happy hound"),
85 | ("cat", "my dissatisfied cat"),
86 | ],
87 | # Add this to store the strings in the content column:
88 | store=True,
89 | )
90 | ```
91 | To include metadata to be stored with each item, call `embed_multi_with_metadata()`:
92 |
93 | ```python
94 | collection.embed_multi_with_metadata(
95 | [
96 | ("hound", "my happy hound", {"name": "Hound"}),
97 | ("cat", "my dissatisfied cat", {"name": "Cat"}),
98 | ],
99 | # This can also take the store=True argument:
100 | store=True,
101 | )
102 | ```
103 | The `batch_size=` argument defaults to 100, and will be used unless the embedding model itself defines a lower batch size. You can adjust this if you are having trouble with memory while embedding large collections:
104 |
105 | ```python
106 | collection.embed_multi(
107 | (
108 | (i, line)
109 | for i, line in enumerate(lines_in_file)
110 | ),
111 | batch_size=10
112 | )
113 | ```
114 |
115 | (embeddings-python-collection-class)=
116 | ### Collection class reference
117 |
118 | A collection instance has the following properties and methods:
119 |
120 | - `id` - the integer ID of the collection in the database
121 | - `name` - the string name of the collection (unique in the database)
122 | - `model_id` - the string ID of the embedding model used for this collection
123 | - `model()` - returns the `EmbeddingModel` instance, based on that `model_id`
124 | - `count()` - returns the integer number of items in the collection
125 | - `embed(id: str, text: str, metadata: dict=None, store: bool=False)` - embeds the given string and stores it in the collection under the given ID. Can optionally include metadata (stored as JSON) and store the text content itself in the database table.
126 | - `embed_multi(entries: Iterable, store: bool=False, batch_size: int=100)` - see above
127 | - `embed_multi_with_metadata(entries: Iterable, store: bool=False, batch_size: int=100)` - see above
128 | - `similar(query: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the given query string
129 | - `similar_by_id(id: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the item with the given ID
130 | - `similar_by_vector(vector: List[float], number: int=10, skip_id: str=None)` - returns a list of entries that are most similar to the given embedding vector, optionally skipping the entry with the given ID
131 | - `delete()` - deletes the collection and its embeddings from the database
132 |
133 | There is also a `Collection.exists(db, name)` class method which returns a boolean value and can be used to determine if a collection exists or not in a database:
134 |
135 | ```python
136 | if Collection.exists(db, "entries"):
137 | print("The entries collection exists")
138 | ```
139 |
140 | (embeddings-python-similar)=
141 | ## Retrieving similar items
142 |
143 | Once you have populated a collection of embeddings you can retrieve the entries that are most similar to a given string using the `similar()` method.
144 |
145 | This method uses a brute force approach, calculating distance scores against every document. This is fine for small collections, but will not scale to large collections. See [issue 216](https://github.com/simonw/llm/issues/216) for plans to add a more scalable approach via vector indexes provided by plugins.
146 |
147 | ```python
148 | for entry in collection.similar("hound"):
149 | print(entry.id, entry.score)
150 | ```
151 | The string will first by embedded using the model for the collection.
152 |
153 | The `entry` object returned is an object with the following properties:
154 |
155 | - `id` - the string ID of the item
156 | - `score` - the floating point similarity score between the item and the query string
157 | - `content` - the string text content of the item, if it was stored - or `None`
158 | - `metadata` - the dictionary (from JSON) metadata for the item, if it was stored - or `None`
159 |
160 | This defaults to returning the 10 most similar items. You can change this by passing a different `number=` argument:
161 | ```python
162 | for entry in collection.similar("hound", number=5):
163 | print(entry.id, entry.score)
164 | ```
165 | The `similar_by_id()` method takes the ID of another item in the collection and returns the most similar items to that one, based on the embedding that has already been stored for it:
166 |
167 | ```python
168 | for entry in collection.similar_by_id("cat"):
169 | print(entry.id, entry.score)
170 | ```
171 | The item itself is excluded from the results.
172 |
173 | (embeddings-sql-schema)=
174 | ## SQL schema
175 |
176 | Here's the SQL schema used by the embeddings database:
177 |
178 |
193 | ```sql
194 | CREATE TABLE [collections] (
195 | [id] INTEGER PRIMARY KEY,
196 | [name] TEXT,
197 | [model] TEXT
198 | )
199 | CREATE TABLE "embeddings" (
200 | [collection_id] INTEGER REFERENCES [collections]([id]),
201 | [id] TEXT,
202 | [embedding] BLOB,
203 | [content] TEXT,
204 | [content_blob] BLOB,
205 | [content_hash] BLOB,
206 | [metadata] TEXT,
207 | [updated] INTEGER,
208 | PRIMARY KEY ([collection_id], [id])
209 | )
210 | ```
211 |
212 |
--------------------------------------------------------------------------------
/docs/embeddings/storage.md:
--------------------------------------------------------------------------------
1 | (embeddings-storage)=
2 | # Embedding storage format
3 |
4 | The default output format of the `llm embed` command is a JSON array of floating point numbers.
5 |
6 | LLM stores embeddings in space-efficient format: a little-endian binary sequences of 32-bit floating point numbers, each represented using 4 bytes.
7 |
8 | These are stored in a `BLOB` column in a SQLite database.
9 |
10 | The following Python functions can be used to convert between this format and an array of floating point numbers:
11 |
12 | ```python
13 | import struct
14 |
15 | def encode(values):
16 | return struct.pack("<" + "f" * len(values), *values)
17 |
18 | def decode(binary):
19 | return struct.unpack("<" + "f" * (len(binary) // 4), binary)
20 | ```
21 |
22 | These functions are available as `llm.encode()` and `llm.decode()`.
23 |
24 | If you are using [NumPy](https://numpy.org/) you can decode one of these binary values like this:
25 |
26 | ```python
27 | import numpy as np
28 |
29 | numpy_array = np.frombuffer(value, "` for details on how to develop and package a plugin.
5 |
6 | This page shows an example plugin that implements and registers a new embedding model.
7 |
8 | There are two components to an embedding model plugin:
9 |
10 | 1. An implementation of the `register_embedding_models()` hook, which takes a `register` callback function and calls it to register the new model with the LLM plugin system.
11 | 2. A class that extends the `llm.EmbeddingModel` abstract base class.
12 |
13 | The only required method on this class is `embed_batch(texts)`, which takes an iterable of strings and returns an iterator over lists of floating point numbers.
14 |
15 | The following example uses the [sentence-transformers](https://github.com/UKPLab/sentence-transformers) package to provide access to the [MiniLM-L6](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) embedding model.
16 |
17 | ```python
18 | import llm
19 | from sentence_transformers import SentenceTransformer
20 |
21 |
22 | @llm.hookimpl
23 | def register_embedding_models(register):
24 | model_id = "sentence-transformers/all-MiniLM-L6-v2"
25 | register(SentenceTransformerModel(model_id, model_id), aliases=("all-MiniLM-L6-v2",))
26 |
27 |
28 | class SentenceTransformerModel(llm.EmbeddingModel):
29 | def __init__(self, model_id, model_name):
30 | self.model_id = model_id
31 | self.model_name = model_name
32 | self._model = None
33 |
34 | def embed_batch(self, texts):
35 | if self._model is None:
36 | self._model = SentenceTransformer(self.model_name)
37 | results = self._model.encode(texts)
38 | return (list(map(float, result)) for result in results)
39 | ```
40 | Once installed, the model provided by this plugin can be used with the {ref}`llm embed ` command like this:
41 |
42 | ```bash
43 | cat file.txt | llm embed -m sentence-transformers/all-MiniLM-L6-v2
44 | ```
45 | Or via its registered alias like this:
46 | ```bash
47 | cat file.txt | llm embed -m all-MiniLM-L6-v2
48 | ```
49 | [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) is a complete example of a plugin that provides an embedding model.
50 |
51 | [Execute Jina embeddings with a CLI using llm-embed-jina](https://simonwillison.net/2023/Oct/26/llm-embed-jina/#how-i-built-the-plugin) talks through a similar process to add support for the [Jina embeddings models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/).
52 |
53 | ## Embedding binary content
54 |
55 | If your model can embed binary content, use the `supports_binary` property to indicate that:
56 |
57 | ```python
58 | class ClipEmbeddingModel(llm.EmbeddingModel):
59 | model_id = "clip"
60 | supports_binary = True
61 | supports_text= True
62 | ```
63 |
64 | `supports_text` defaults to `True` and so is not necessary here. You can set it to `False` if your model only supports binary data.
65 |
66 | If your model accepts binary, your `.embed_batch()` model may be called with a list of Python bytestrings. These may be mixed with regular strings if the model accepts both types of input.
67 |
68 | [llm-clip](https://github.com/simonw/llm-clip) is an example of a model that can embed both binary and text content.
69 |
--------------------------------------------------------------------------------
/docs/fragments.md:
--------------------------------------------------------------------------------
1 | (fragments)=
2 | # Fragments
3 |
4 | LLM prompts can optionally be composed out of **fragments** - reusable pieces of text that are logged just once to the database and can then be attached to multiple prompts.
5 |
6 | These are particularly useful when you are working with long context models, which support feeding large amounts of text in as part of your prompt.
7 |
8 | Fragments primarily exist to save space in the database, but may be used to support other features such as vendor prompt caching as well.
9 |
10 | Fragments can be specified using several different mechanisms:
11 |
12 | - URLs to text files online
13 | - Paths to text files on disk
14 | - Aliases that have been attached to a specific fragment
15 | - Hash IDs of stored fragments, where the ID is the SHA256 hash of the fragment content
16 | - Fragments that are provided by custom plugins - these look like `plugin-name:argument`
17 |
18 | (fragments-usage)=
19 | ## Using fragments in a prompt
20 |
21 | Use the `-f/--fragment` option to specify one or more fragments to be used as part of your prompt:
22 |
23 | ```bash
24 | llm -f https://llm.datasette.io/robots.txt "Explain this robots.txt file in detail"
25 | ```
26 | Here we are specifying a fragment using a URL. The contents of that URL will be included in the prompt that is sent to the model, prepended prior to the prompt text.
27 |
28 | The `-f` option can be used multiple times to combine together multiple fragments.
29 |
30 | Fragments can also be files on disk, for example:
31 | ```bash
32 | llm -f setup.py 'extract the metadata'
33 | ```
34 | Use `-` to specify a fragment that is read from standard input:
35 | ```bash
36 | llm -f - 'extract the metadata' < setup.py
37 | ```
38 | This will read the contents of `setup.py` from standard input and use it as a fragment.
39 |
40 | Fragments can also be used as part of your system prompt. Use `--sf value` or `--system-fragment value` instead of `-f`.
41 |
42 | ## Using fragments in chat
43 |
44 | The `chat` command also supports the `-f` and `--sf` arguments to start a chat with fragments.
45 |
46 | ```bash
47 | llm chat -f my_doc.txt
48 | Chatting with gpt-4
49 | Type 'exit' or 'quit' to exit
50 | Type '!multi' to enter multiple lines, then '!end' to finish
51 | Type '!edit' to open your default editor and modify the prompt.
52 | Type '!fragment [ ...]' to insert one or more fragments
53 | > Explain this document to me
54 | ```
55 |
56 | Fragments can also be added *during* a chat conversation using the `!fragment ` command.
57 |
58 | ```bash
59 | Chatting with gpt-4
60 | Type 'exit' or 'quit' to exit
61 | Type '!multi' to enter multiple lines, then '!end' to finish
62 | Type '!edit' to open your default editor and modify the prompt.
63 | Type '!fragment [ ...]' to insert one or more fragments
64 | > !fragment https://llm.datasette.io/en/stable/fragments.html
65 | ```
66 |
67 | This can be combined with `!multi`:
68 |
69 | ```bash
70 | > !multi
71 | Explain the difference between fragments and templates to me
72 | !fragment https://llm.datasette.io/en/stable/fragments.html https://llm.datasette.io/en/stable/templates.html
73 | !end
74 | ```
75 |
76 | Any `!fragment` lines found in a prompt created with `!edit` will not be parsed.
77 |
78 | (fragments-browsing)=
79 | ## Browsing fragments
80 |
81 | You can view a truncated version of the fragments you have previously stored in your database with the `llm fragments` command:
82 |
83 | ```bash
84 | llm fragments
85 | ```
86 | The output from that command looks like this:
87 |
88 | ```yaml
89 | - hash: 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
90 | aliases: []
91 | datetime_utc: '2025-04-06 07:36:53'
92 | source: https://raw.githubusercontent.com/simonw/llm-docs/refs/heads/main/llm/0.22.txt
93 | content: |-
94 |
95 |
96 | docs/aliases.md
97 |
98 | (aliases)=
99 | #...
100 | - hash: 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c
101 | aliases: []
102 | datetime_utc: '2025-04-06 23:03:47'
103 | source: simonw/files-to-prompt/pyproject.toml
104 | content: |-
105 | [project]
106 | name = "files-to-prompt"
107 | version = "0.6"
108 | description = "Concatenate a directory full of...
109 | ```
110 | Those long `hash` values are IDs that can be used to reference a fragment in the future:
111 | ```bash
112 | llm -f 16b686067375182573e2aa16b5bfc1e64d48350232535d06444537e51f1fd60c 'Extract metadata'
113 | ```
114 | Use `-q searchterm` one or more times to search for fragments that match a specific set of search terms.
115 |
116 | To view the full content of a fragment use `llm fragments show`:
117 | ```bash
118 | llm fragments show 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
119 | ```
120 |
121 | (fragments-aliases)=
122 | ## Setting aliases for fragments
123 |
124 | You can assign aliases to fragments that you use often using the `llm fragments set` command:
125 | ```bash
126 | llm fragments set mydocs ./docs.md
127 | ```
128 | To remove an alias, use `llm fragments remove`:
129 | ```bash
130 | llm fragments remove mydocs
131 | ```
132 | You can then use that alias in place of the fragment hash ID:
133 | ```bash
134 | llm -f mydocs 'How do I access metadata?'
135 | ```
136 | Use `llm fragments --aliases` to see a full list of fragments that have been assigned aliases:
137 | ```bash
138 | llm fragments --aliases
139 | ```
140 |
141 | (fragments-logs)=
142 | ## Viewing fragments in your logs
143 |
144 | The `llm logs` command lists the fragments that were used for a prompt. By default these are listed as fragment hash IDs, but you can use the `--expand` option to show the full content of each fragment.
145 |
146 | This command will show the expanded fragments for your most recent conversation:
147 |
148 | ```bash
149 | llm logs -c --expand
150 | ```
151 | You can filter for logs that used a specific fragment using the `-f/--fragment` option:
152 | ```bash
153 | llm logs -c -f 0d6e368f9bc21f8db78c01e192ecf925841a957d8b991f5bf9f6239aa4d81815
154 | ```
155 | This accepts URLs, file paths, aliases, and hash IDs.
156 |
157 | Multiple `-f` options will return responses that used **all** of the specified fragments.
158 |
159 | Fragments are returned by `llm logs --json` as well. By default these are truncated but you can add the `-e/--expand` option to show the full content of each fragment.
160 |
161 | ```bash
162 | llm logs -c --json --expand
163 | ```
164 |
165 | (fragments-plugins)=
166 | ## Using fragments from plugins
167 |
168 | LLM plugins can provide custom fragment loaders which do useful things.
169 |
170 | One example is the [llm-fragments-github plugin](https://github.com/simonw/llm-fragments-github). This can convert the files from a public GitHub repository into a list of fragments, allowing you to ask questions about the full repository.
171 |
172 | Here's how to try that out:
173 |
174 | ```bash
175 | llm install llm-fragments-github
176 | llm -f github:simonw/s3-credentials 'Suggest new features for this tool'
177 | ```
178 | This plugin turns a single call to `-f github:simonw/s3-credentials` into multiple fragments, one for every text file in the [simonw/s3-credentials](https://github.com/simonw/s3-credentials) GitHub repository.
179 |
180 | Running `llm logs -c` will show that this prompt incorporated 26 fragments, one for each file.
181 |
182 | Running `llm logs -c --usage --expand` (shortcut: `llm logs -cue`) includes token usage information and turns each fragment ID into a full copy of that file. [Here's the output of that command](https://gist.github.com/simonw/c9bbbc5f6560b01f4b7882ac0194fb25).
183 |
184 | Fragment plugins can return {ref}`attachments ` (such as images) as well.
185 |
186 | See the {ref}`register_fragment_loaders() plugin hook ` documentation for details on writing your own custom fragment plugin.
187 |
188 | (fragments-loaders)=
189 | ## Listing available fragment prefixes
190 |
191 | The `llm fragments loaders` command shows all prefixes that have been installed by plugins, along with their documentation:
192 |
193 | ```bash
194 | llm install llm-fragments-github
195 | llm fragments loaders
196 | ```
197 | Example output:
198 | ```
199 | github:
200 | Load files from a GitHub repository as fragments
201 |
202 | Argument is a GitHub repository URL or username/repository
203 |
204 | issue:
205 | Fetch GitHub issue and comments as Markdown
206 |
207 | Argument is either "owner/repo/NUMBER"
208 | or "https://github.com/owner/repo/issues/NUMBER"
209 | ```
210 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # LLM
2 |
3 | [](https://github.com/simonw/llm)
4 | [](https://pypi.org/project/llm/)
5 | [](https://llm.datasette.io/en/stable/changelog.html)
6 | [](https://github.com/simonw/llm/actions?query=workflow%3ATest)
7 | [](https://github.com/simonw/llm/blob/main/LICENSE)
8 | [](https://datasette.io/discord-llm)
9 | [](https://formulae.brew.sh/formula/llm)
10 |
11 | A CLI tool and Python library for interacting with **OpenAI**, **Anthropic's Claude**, **Google's Gemini**, **Meta's Llama** and dozens of other Large Language Models, both via remote APIs and with models that can be installed and run on your own machine.
12 |
13 | Watch **[Language models on the command-line](https://www.youtube.com/watch?v=QUXQNi6jQ30)** on YouTube for a demo or [read the accompanying detailed notes](https://simonwillison.net/2024/Jun/17/cli-language-models/).
14 |
15 | With LLM you can:
16 | - {ref}`Run prompts from the command-line `
17 | - {ref}`Store prompts and responses in SQLite `
18 | - {ref}`Generate and store embeddings `
19 | - {ref}`Extract structured content from text and images `
20 | - ... and much, much more
21 |
22 | ## Quick start
23 |
24 | First, install LLM using `pip` or Homebrew or `pipx` or `uv`:
25 |
26 | ```bash
27 | pip install llm
28 | ```
29 | Or with Homebrew (see {ref}`warning note `):
30 | ```bash
31 | brew install llm
32 | ```
33 | Or with [pipx](https://pypa.github.io/pipx/):
34 | ```bash
35 | pipx install llm
36 | ```
37 | Or with [uv](https://docs.astral.sh/uv/guides/tools/)
38 | ```bash
39 | uv tool install llm
40 | ```
41 | If you have an [OpenAI API key](https://platform.openai.com/api-keys) key you can run this:
42 | ```bash
43 | # Paste your OpenAI API key into this
44 | llm keys set openai
45 |
46 | # Run a prompt (with the default gpt-4o-mini model)
47 | llm "Ten fun names for a pet pelican"
48 |
49 | # Extract text from an image
50 | llm "extract text" -a scanned-document.jpg
51 |
52 | # Use a system prompt against a file
53 | cat myfile.py | llm -s "Explain this code"
54 | ```
55 | Run prompts against [Gemini](https://aistudio.google.com/apikey) or [Anthropic](https://console.anthropic.com/) with their respective plugins:
56 | ```bash
57 | llm install llm-gemini
58 | llm keys set gemini
59 | # Paste Gemini API key here
60 | llm -m gemini-2.0-flash 'Tell me fun facts about Mountain View'
61 |
62 | llm install llm-anthropic
63 | llm keys set anthropic
64 | # Paste Anthropic API key here
65 | llm -m claude-4-opus 'Impress me with wild facts about turnips'
66 | ```
67 | You can also {ref}`install a plugin ` to access models that can run on your local device. If you use [Ollama](https://ollama.com/):
68 | ```bash
69 | # Install the plugin
70 | llm install llm-ollama
71 |
72 | # Download and run a prompt against the Orca Mini 7B model
73 | ollama pull llama3.2:latest
74 | llm -m llama3.2:latest 'What is the capital of France?'
75 | ```
76 | To start {ref}`an interactive chat ` with a model, use `llm chat`:
77 | ```bash
78 | llm chat -m gpt-4.1
79 | ```
80 | ```
81 | Chatting with gpt-4.1
82 | Type 'exit' or 'quit' to exit
83 | Type '!multi' to enter multiple lines, then '!end' to finish
84 | Type '!edit' to open your default editor and modify the prompt.
85 | Type '!fragment [ ...]' to insert one or more fragments
86 | > Tell me a joke about a pelican
87 | Why don't pelicans like to tip waiters?
88 |
89 | Because they always have a big bill!
90 | ```
91 |
92 | More background on this project:
93 |
94 | - [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/)
95 | - [The LLM CLI tool now supports self-hosted language models via plugins](https://simonwillison.net/2023/Jul/12/llm/)
96 | - [LLM now provides tools for working with embeddings](https://simonwillison.net/2023/Sep/4/llm-embeddings/)
97 | - [Build an image search engine with llm-clip, chat with models with llm chat](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/)
98 | - [You can now run prompts against images, audio and video in your terminal using LLM](https://simonwillison.net/2024/Oct/29/llm-multi-modal/)
99 | - [Structured data extraction from unstructured content using LLM schemas](https://simonwillison.net/2025/Feb/28/llm-schemas/)
100 | - [Long context support in LLM 0.24 using fragments and template plugins](https://simonwillison.net/2025/Apr/7/long-context-llm/)
101 |
102 | See also [the llm tag](https://simonwillison.net/tags/llm/) on my blog.
103 |
104 | ## Contents
105 |
106 | ```{toctree}
107 | ---
108 | maxdepth: 3
109 | ---
110 | setup
111 | usage
112 | openai-models
113 | other-models
114 | tools
115 | schemas
116 | templates
117 | fragments
118 | aliases
119 | embeddings/index
120 | plugins/index
121 | python-api
122 | logging
123 | related-tools
124 | help
125 | contributing
126 | ```
127 | ```{toctree}
128 | ---
129 | maxdepth: 1
130 | ---
131 | changelog
132 | ```
--------------------------------------------------------------------------------
/docs/logging.md:
--------------------------------------------------------------------------------
1 | (logging)=
2 | # Logging to SQLite
3 |
4 | `llm` defaults to logging all prompts and responses to a SQLite database.
5 |
6 | You can find the location of that database using the `llm logs path` command:
7 |
8 | ```bash
9 | llm logs path
10 | ```
11 | On my Mac that outputs:
12 | ```
13 | /Users/simon/Library/Application Support/io.datasette.llm/logs.db
14 | ```
15 | This will differ for other operating systems.
16 |
17 | To avoid logging an individual prompt, pass `--no-log` or `-n` to the command:
18 | ```bash
19 | llm 'Ten names for cheesecakes' -n
20 | ```
21 |
22 | To turn logging by default off:
23 |
24 | ```bash
25 | llm logs off
26 | ```
27 | If you've turned off logging you can still log an individual prompt and response by adding `--log`:
28 | ```bash
29 | llm 'Five ambitious names for a pet pterodactyl' --log
30 | ```
31 | To turn logging by default back on again:
32 |
33 | ```bash
34 | llm logs on
35 | ```
36 | To see the status of the logs database, run this:
37 | ```bash
38 | llm logs status
39 | ```
40 | Example output:
41 | ```
42 | Logging is ON for all prompts
43 | Found log database at /Users/simon/Library/Application Support/io.datasette.llm/logs.db
44 | Number of conversations logged: 33
45 | Number of responses logged: 48
46 | Database file size: 19.96MB
47 | ```
48 |
49 | (logging-view)=
50 |
51 | ## Viewing the logs
52 |
53 | You can view the logs using the `llm logs` command:
54 | ```bash
55 | llm logs
56 | ```
57 | This will output the three most recent logged items in Markdown format, showing both the prompt and the response formatted using Markdown.
58 |
59 | To get back just the most recent prompt response as plain text, add `-r/--response`:
60 |
61 | ```bash
62 | llm logs -r
63 | ```
64 | Use `-x/--extract` to extract and return the first fenced code block from the selected log entries:
65 |
66 | ```bash
67 | llm logs --extract
68 | ```
69 | Or `--xl/--extract-last` for the last fenced code block:
70 | ```bash
71 | llm logs --extract-last
72 | ```
73 |
74 | Add `--json` to get the log messages in JSON instead:
75 |
76 | ```bash
77 | llm logs --json
78 | ```
79 |
80 | Add `-n 10` to see the ten most recent items:
81 | ```bash
82 | llm logs -n 10
83 | ```
84 | Or `-n 0` to see everything that has ever been logged:
85 | ```bash
86 | llm logs -n 0
87 | ```
88 | You can truncate the display of the prompts and responses using the `-t/--truncate` option. This can help make the JSON output more readable - though the `--short` option is usually better.
89 | ```bash
90 | llm logs -n 1 -t --json
91 | ```
92 | Example output:
93 | ```json
94 | [
95 | {
96 | "id": "01jm8ec74wxsdatyn5pq1fp0s5",
97 | "model": "anthropic/claude-3-haiku-20240307",
98 | "prompt": "hi",
99 | "system": null,
100 | "prompt_json": null,
101 | "response": "Hello! How can I assist you today?",
102 | "conversation_id": "01jm8ec74taftdgj2t4zra9z0j",
103 | "duration_ms": 560,
104 | "datetime_utc": "2025-02-16T22:34:30.374882+00:00",
105 | "input_tokens": 8,
106 | "output_tokens": 12,
107 | "token_details": null,
108 | "conversation_name": "hi",
109 | "conversation_model": "anthropic/claude-3-haiku-20240307",
110 | "attachments": []
111 | }
112 | ]
113 | ```
114 |
115 | (logging-short)=
116 |
117 | ### -s/--short mode
118 |
119 | Use `-s/--short` to see a shortened YAML log with truncated prompts and no responses:
120 | ```bash
121 | llm logs -n 2 --short
122 | ```
123 | Example output:
124 | ```yaml
125 | - model: deepseek-reasoner
126 | datetime: '2025-02-02T06:39:53'
127 | conversation: 01jk2pk05xq3d0vgk0202zrsg1
128 | prompt: H01 There are five huts. H02 The Scotsman lives in the purple hut. H03 The Welshman owns the parrot. H04 Kombucha is...
129 | - model: o3-mini
130 | datetime: '2025-02-02T19:03:05'
131 | conversation: 01jk40qkxetedzpf1zd8k9bgww
132 | system: Formatting re-enabled. Write a detailed README with extensive usage examples.
133 | prompt: ./Cargo.toml [package] name = "py-limbo" version...
134 | ```
135 | Include `-u/--usage` to include token usage information:
136 |
137 | ```bash
138 | llm logs -n 1 --short --usage
139 | ```
140 | Example output:
141 | ```yaml
142 | - model: o3-mini
143 | datetime: '2025-02-16T23:00:56'
144 | conversation: 01jm8fxxnef92n1663c6ays8xt
145 | system: Produce Python code that demonstrates every possible usage of yaml.dump
146 | with all of the arguments it can take, especi...
147 | prompt: ./setup.py
148 | NAME = 'PyYAML' VERSION = '7.0.0.dev0...
149 | usage:
150 | input: 74793
151 | output: 3550
152 | details:
153 | completion_tokens_details:
154 | reasoning_tokens: 2240
155 | ```
156 |
157 | (logging-conversation)=
158 |
159 | ### Logs for a conversation
160 |
161 | To view the logs for the most recent {ref}`conversation ` you have had with a model, use `-c`:
162 |
163 | ```bash
164 | llm logs -c
165 | ```
166 | To see logs for a specific conversation based on its ID, use `--cid ID` or `--conversation ID`:
167 |
168 | ```bash
169 | llm logs --cid 01h82n0q9crqtnzmf13gkyxawg
170 | ```
171 |
172 | (logging-search)=
173 |
174 | ### Searching the logs
175 |
176 | You can search the logs for a search term in the `prompt` or the `response` columns.
177 | ```bash
178 | llm logs -q 'cheesecake'
179 | ```
180 | The most relevant terms will be shown at the bottom of the output.
181 |
182 | (logging-filter-id)=
183 |
184 | ### Filtering past a specific ID
185 |
186 | If you want to retrieve all of the logs that were recorded since a specific response ID you can do so using these options:
187 |
188 | - `--id-gt $ID` - every record with an ID greater than $ID
189 | - `--id-gte $ID` - every record with an ID greater than or equal to $ID
190 |
191 | IDs are always issued in ascending order by time, so this provides a useful way to see everything that has happened since a particular record.
192 |
193 | This can be particularly useful when {ref}`working with schema data `, where you might want to access every record that you have created using a specific `--schema` but exclude records you have previously processed.
194 |
195 | (logging-filter-model)=
196 |
197 | ### Filtering by model
198 |
199 | You can filter to logs just for a specific model (or model alias) using `-m/--model`:
200 | ```bash
201 | llm logs -m chatgpt
202 | ```
203 |
204 | (logging-filter-fragments)=
205 |
206 | ### Filtering by prompts that used specific fragments
207 |
208 | The `-f/--fragment X` option will filter for just responses that were created using the specified {ref}`fragment ` hash or alias or URL or filename.
209 |
210 | Fragments are displayed in the logs as their hash ID. Add `-e/--expand` to display fragments as their full content - this option works for both the default Markdown and the `--json` mode:
211 |
212 | ```bash
213 | llm logs -f https://llm.datasette.io/robots.txt --expand
214 | ```
215 | You can display just the content for a specific fragment hash ID (or alias) using the `llm fragments show` command:
216 |
217 | ```bash
218 | llm fragments show 993fd38d898d2b59fd2d16c811da5bdac658faa34f0f4d411edde7c17ebb0680
219 | ```
220 | If you provide multiple fragments you will get back responses that used _all_ of those fragments.
221 |
222 | (logging-filter-tools)=
223 |
224 | ### Filtering by prompts that used specific tools
225 |
226 | You can filter for responses that used tools from specific fragments with the `--tool/-T` option:
227 |
228 | ```bash
229 | llm logs -T simple_eval
230 | ```
231 | This will match responses that involved a _result_ from that tool. If the tool was not executed it will not be included in the filtered responses.
232 |
233 | Pass `--tool/-T` multiple times for responses that used all of the specified tools.
234 |
235 | Use the `llm logs --tools` flag to see _all_ responses that involved at least one tool result, including from `--functions`:
236 |
237 | ```bash
238 | llm logs --tools
239 | ```
240 |
241 | (logging-filter-schemas)=
242 |
243 | ### Browsing data collected using schemas
244 |
245 | The `--schema X` option can be used to view responses that used the specified schema, using any of the {ref}`ways to specify a schema `:
246 |
247 | ```bash
248 | llm logs --schema 'name, age int, bio'
249 | ```
250 |
251 | This can be combined with `--data` and `--data-array` and `--data-key` to extract just the returned JSON data - consult the {ref}`schemas documentation ` for details.
252 |
253 | (logging-datasette)=
254 |
255 | ## Browsing logs using Datasette
256 |
257 | You can also use [Datasette](https://datasette.io/) to browse your logs like this:
258 |
259 | ```bash
260 | datasette "$(llm logs path)"
261 | ```
262 |
263 | (logging-backup)=
264 |
265 | ## Backing up your database
266 |
267 | You can backup your logs to another file using the `llm logs backup` command:
268 |
269 | ```bash
270 | llm logs backup /tmp/backup.db
271 | ```
272 | This uses SQLite [VACUUM INTO](https://sqlite.org/lang_vacuum.html#vacuum_with_an_into_clause) under the hood.
273 |
274 | (logging-sql-schema)=
275 |
276 | ## SQL schema
277 |
278 | Here's the SQL schema used by the `logs.db` database:
279 |
280 |
305 | ```sql
306 | CREATE TABLE [conversations] (
307 | [id] TEXT PRIMARY KEY,
308 | [name] TEXT,
309 | [model] TEXT
310 | );
311 | CREATE TABLE [schemas] (
312 | [id] TEXT PRIMARY KEY,
313 | [content] TEXT
314 | );
315 | CREATE TABLE "responses" (
316 | [id] TEXT PRIMARY KEY,
317 | [model] TEXT,
318 | [prompt] TEXT,
319 | [system] TEXT,
320 | [prompt_json] TEXT,
321 | [options_json] TEXT,
322 | [response] TEXT,
323 | [response_json] TEXT,
324 | [conversation_id] TEXT REFERENCES [conversations]([id]),
325 | [duration_ms] INTEGER,
326 | [datetime_utc] TEXT,
327 | [input_tokens] INTEGER,
328 | [output_tokens] INTEGER,
329 | [token_details] TEXT,
330 | [schema_id] TEXT REFERENCES [schemas]([id]),
331 | [resolved_model] TEXT
332 | );
333 | CREATE VIRTUAL TABLE [responses_fts] USING FTS5 (
334 | [prompt],
335 | [response],
336 | content=[responses]
337 | );
338 | CREATE TABLE [attachments] (
339 | [id] TEXT PRIMARY KEY,
340 | [type] TEXT,
341 | [path] TEXT,
342 | [url] TEXT,
343 | [content] BLOB
344 | );
345 | CREATE TABLE [prompt_attachments] (
346 | [response_id] TEXT REFERENCES [responses]([id]),
347 | [attachment_id] TEXT REFERENCES [attachments]([id]),
348 | [order] INTEGER,
349 | PRIMARY KEY ([response_id],
350 | [attachment_id])
351 | );
352 | CREATE TABLE [fragments] (
353 | [id] INTEGER PRIMARY KEY,
354 | [hash] TEXT,
355 | [content] TEXT,
356 | [datetime_utc] TEXT,
357 | [source] TEXT
358 | );
359 | CREATE TABLE [fragment_aliases] (
360 | [alias] TEXT PRIMARY KEY,
361 | [fragment_id] INTEGER REFERENCES [fragments]([id])
362 | );
363 | CREATE TABLE "prompt_fragments" (
364 | [response_id] TEXT REFERENCES [responses]([id]),
365 | [fragment_id] INTEGER REFERENCES [fragments]([id]),
366 | [order] INTEGER,
367 | PRIMARY KEY ([response_id],
368 | [fragment_id],
369 | [order])
370 | );
371 | CREATE TABLE "system_fragments" (
372 | [response_id] TEXT REFERENCES [responses]([id]),
373 | [fragment_id] INTEGER REFERENCES [fragments]([id]),
374 | [order] INTEGER,
375 | PRIMARY KEY ([response_id],
376 | [fragment_id],
377 | [order])
378 | );
379 | CREATE TABLE [tools] (
380 | [id] INTEGER PRIMARY KEY,
381 | [hash] TEXT,
382 | [name] TEXT,
383 | [description] TEXT,
384 | [input_schema] TEXT,
385 | [plugin] TEXT
386 | );
387 | CREATE TABLE [tool_responses] (
388 | [tool_id] INTEGER REFERENCES [tools]([id]),
389 | [response_id] TEXT REFERENCES [responses]([id]),
390 | PRIMARY KEY ([tool_id],
391 | [response_id])
392 | );
393 | CREATE TABLE [tool_calls] (
394 | [id] INTEGER PRIMARY KEY,
395 | [response_id] TEXT REFERENCES [responses]([id]),
396 | [tool_id] INTEGER REFERENCES [tools]([id]),
397 | [name] TEXT,
398 | [arguments] TEXT,
399 | [tool_call_id] TEXT
400 | );
401 | CREATE TABLE "tool_results" (
402 | [id] INTEGER PRIMARY KEY,
403 | [response_id] TEXT REFERENCES [responses]([id]),
404 | [tool_id] INTEGER REFERENCES [tools]([id]),
405 | [name] TEXT,
406 | [output] TEXT,
407 | [tool_call_id] TEXT,
408 | [instance_id] INTEGER REFERENCES [tool_instances]([id]),
409 | [exception] TEXT
410 | );
411 | CREATE TABLE [tool_instances] (
412 | [id] INTEGER PRIMARY KEY,
413 | [plugin] TEXT,
414 | [name] TEXT,
415 | [arguments] TEXT
416 | );
417 | ```
418 |
419 | `responses_fts` configures [SQLite full-text search](https://www.sqlite.org/fts5.html) against the `prompt` and `response` columns in the `responses` table.
420 |
--------------------------------------------------------------------------------
/docs/openai-models.md:
--------------------------------------------------------------------------------
1 | (openai-models)=
2 |
3 | # OpenAI models
4 |
5 | LLM ships with a default plugin for talking to OpenAI's API. OpenAI offer both language models and embedding models, and LLM can access both types.
6 |
7 | (openai-models-configuration)=
8 |
9 | ## Configuration
10 |
11 | All OpenAI models are accessed using an API key. You can obtain one from [the API keys page](https://platform.openai.com/api-keys) on their site.
12 |
13 | Once you have created a key, configure LLM to use it by running:
14 |
15 | ```bash
16 | llm keys set openai
17 | ```
18 | Then paste in the API key.
19 |
20 | (openai-models-language)=
21 |
22 | ## OpenAI language models
23 |
24 | Run `llm models` for a full list of available models. The OpenAI models supported by LLM are:
25 |
26 |
33 | ```
34 | OpenAI Chat: gpt-4o (aliases: 4o)
35 | OpenAI Chat: chatgpt-4o-latest (aliases: chatgpt-4o)
36 | OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
37 | OpenAI Chat: gpt-4o-audio-preview
38 | OpenAI Chat: gpt-4o-audio-preview-2024-12-17
39 | OpenAI Chat: gpt-4o-audio-preview-2024-10-01
40 | OpenAI Chat: gpt-4o-mini-audio-preview
41 | OpenAI Chat: gpt-4o-mini-audio-preview-2024-12-17
42 | OpenAI Chat: gpt-4.1 (aliases: 4.1)
43 | OpenAI Chat: gpt-4.1-mini (aliases: 4.1-mini)
44 | OpenAI Chat: gpt-4.1-nano (aliases: 4.1-nano)
45 | OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
46 | OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
47 | OpenAI Chat: gpt-4 (aliases: 4, gpt4)
48 | OpenAI Chat: gpt-4-32k (aliases: 4-32k)
49 | OpenAI Chat: gpt-4-1106-preview
50 | OpenAI Chat: gpt-4-0125-preview
51 | OpenAI Chat: gpt-4-turbo-2024-04-09
52 | OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
53 | OpenAI Chat: gpt-4.5-preview-2025-02-27
54 | OpenAI Chat: gpt-4.5-preview (aliases: gpt-4.5)
55 | OpenAI Chat: o1
56 | OpenAI Chat: o1-2024-12-17
57 | OpenAI Chat: o1-preview
58 | OpenAI Chat: o1-mini
59 | OpenAI Chat: o3-mini
60 | OpenAI Chat: o3
61 | OpenAI Chat: o4-mini
62 | OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
63 | ```
64 |
65 |
66 | See [the OpenAI models documentation](https://platform.openai.com/docs/models) for details of each of these.
67 |
68 | `gpt-4o-mini` (aliased to `4o-mini`) is the least expensive model, and is the default for if you don't specify a model at all. Consult [OpenAI's model documentation](https://platform.openai.com/docs/models) for details of the other models.
69 |
70 | [o1-pro](https://platform.openai.com/docs/models/o1-pro) is not available through the Chat Completions API used by LLM's default OpenAI plugin. You can install the new [llm-openai-plugin](https://github.com/simonw/llm-openai-plugin) plugin to access that model.
71 |
72 | ## Model features
73 |
74 | The following features work with OpenAI models:
75 |
76 | - {ref}`System prompts ` can be used to provide instructions that have a higher weight than the prompt itself.
77 | - {ref}`Attachments `. Many OpenAI models support image inputs - check which ones using `llm models --options`. Any model that accepts images can also accept PDFs.
78 | - {ref}`Schemas ` can be used to influence the JSON structure of the model output.
79 | - {ref}`Model options ` can be used to set parameters like `temperature`. Use `llm models --options` for a full list of supported options.
80 |
81 | (openai-models-embedding)=
82 |
83 | ## OpenAI embedding models
84 |
85 | Run `llm embed-models` for a list of {ref}`embedding models `. The following OpenAI embedding models are supported by LLM:
86 |
87 | ```
88 | ada-002 (aliases: ada, oai)
89 | 3-small
90 | 3-large
91 | 3-small-512
92 | 3-large-256
93 | 3-large-1024
94 | ```
95 |
96 | The `3-small` model is currently the most inexpensive. `3-large` costs more but is more capable - see [New embedding models and API updates](https://openai.com/blog/new-embedding-models-and-api-updates) on the OpenAI blog for details and benchmarks.
97 |
98 | An important characteristic of any embedding model is the size of the vector it returns. Smaller vectors cost less to store and query, but may be less accurate.
99 |
100 | OpenAI `3-small` and `3-large` vectors can be safely truncated to lower dimensions without losing too much accuracy. The `-int` models provided by LLM are pre-configured to do this, so `3-large-256` is the `3-large` model truncated to 256 dimensions.
101 |
102 | The vector size of the supported OpenAI embedding models are as follows:
103 |
104 | | Model | Size |
105 | | --- | --- |
106 | | ada-002 | 1536 |
107 | | 3-small | 1536 |
108 | | 3-large | 3072 |
109 | | 3-small-512 | 512 |
110 | | 3-large-256 | 256 |
111 | | 3-large-1024 | 1024 |
112 |
113 | (openai-completion-models)=
114 |
115 | ## OpenAI completion models
116 |
117 | The `gpt-3.5-turbo-instruct` model is a little different - it is a completion model rather than a chat model, described in [the OpenAI completions documentation](https://platform.openai.com/docs/api-reference/completions/create).
118 |
119 | Completion models can be called with the `-o logprobs 3` option (not supported by chat models) which will cause LLM to store 3 log probabilities for each returned token in the SQLite database. Consult [this issue](https://github.com/simonw/llm/issues/284#issuecomment-1724772704) for details on how to read these values.
120 |
121 | (openai-extra-models)=
122 |
123 | ## Adding more OpenAI models
124 |
125 | OpenAI occasionally release new models with new names. LLM aims to ship new releases to support these, but you can also configure them directly, by adding them to a `extra-openai-models.yaml` configuration file.
126 |
127 | Run this command to find the directory in which this file should be created:
128 |
129 | ```bash
130 | dirname "$(llm logs path)"
131 | ```
132 | On my Mac laptop I get this:
133 | ```
134 | ~/Library/Application Support/io.datasette.llm
135 | ```
136 | Create a file in that directory called `extra-openai-models.yaml`.
137 |
138 | Let's say OpenAI have just released the `gpt-3.5-turbo-0613` model and you want to use it, despite LLM not yet shipping support. You could configure that by adding this to the file:
139 |
140 | ```yaml
141 | - model_id: gpt-3.5-turbo-0613
142 | model_name: gpt-3.5-turbo-0613
143 | aliases: ["0613"]
144 | ```
145 | The `model_id` is the identifier that will be recorded in the LLM logs. You can use this to specify the model, or you can optionally include a list of aliases for that model. The `model_name` is the actual model identifier that will be passed to the API, which must match exactly what the API expects.
146 |
147 | If the model is a completion model (such as `gpt-3.5-turbo-instruct`) add `completion: true` to the configuration.
148 |
149 | If the model supports structured extraction using json_schema, add `supports_schema: true` to the configuration.
150 |
151 | For reasoning models like `o1` or `o3-mini` add `reasoning: true`.
152 |
153 | With this configuration in place, the following command should run a prompt against the new model:
154 |
155 | ```bash
156 | llm -m 0613 'What is the capital of France?'
157 | ```
158 | Run `llm models` to confirm that the new model is now available:
159 | ```bash
160 | llm models
161 | ```
162 | Example output:
163 | ```
164 | OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
165 | OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
166 | OpenAI Chat: gpt-4 (aliases: 4, gpt4)
167 | OpenAI Chat: gpt-4-32k (aliases: 4-32k)
168 | OpenAI Chat: gpt-3.5-turbo-0613 (aliases: 0613)
169 | ```
170 | Running `llm logs -n 1` should confirm that the prompt and response has been correctly logged to the database.
171 |
--------------------------------------------------------------------------------
/docs/other-models.md:
--------------------------------------------------------------------------------
1 | (other-models)=
2 | # Other models
3 |
4 | LLM supports OpenAI models by default. You can install {ref}`plugins ` to add support for other models. You can also add additional OpenAI-API-compatible models {ref}`using a configuration file `.
5 |
6 | ## Installing and using a local model
7 |
8 | {ref}`LLM plugins ` can provide local models that run on your machine.
9 |
10 | To install **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)**, providing 17 models from the [GPT4All](https://gpt4all.io/) project, run this:
11 |
12 | ```bash
13 | llm install llm-gpt4all
14 | ```
15 | Run `llm models` to see the expanded list of available models.
16 |
17 | To run a prompt through one of the models from GPT4All specify it using `-m/--model`:
18 | ```bash
19 | llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?'
20 | ```
21 | The model will be downloaded and cached the first time you use it.
22 |
23 | Check the {ref}`plugin directory ` for the latest list of available plugins for other models.
24 |
25 | (openai-compatible-models)=
26 |
27 | ## OpenAI-compatible models
28 |
29 | Projects such as [LocalAI](https://localai.io/) offer a REST API that imitates the OpenAI API but can be used to run other models, including models that can be installed on your own machine. These can be added using the same configuration mechanism.
30 |
31 | The `model_id` is the name LLM will use for the model. The `model_name` is the name which needs to be passed to the API - this might differ from the `model_id`, especially if the `model_id` could potentially clash with other installed models.
32 |
33 | The `api_base` key can be used to point the OpenAI client library at a different API endpoint.
34 |
35 | To add the `orca-mini-3b` model hosted by a local installation of [LocalAI](https://localai.io/), add this to your `extra-openai-models.yaml` file:
36 |
37 | ```yaml
38 | - model_id: orca-openai-compat
39 | model_name: orca-mini-3b.ggmlv3
40 | api_base: "http://localhost:8080"
41 | ```
42 | If the `api_base` is set, the existing configured `openai` API key will not be sent by default.
43 |
44 | You can set `api_key_name` to the name of a key stored using the {ref}`api-keys` feature.
45 |
46 | Add `completion: true` if the model is a completion model that uses a `/completion` as opposed to a `/completion/chat` endpoint.
47 |
48 | If a model does not support streaming, add `can_stream: false` to disable the streaming option.
49 |
50 | If a model supports structured output via JSON schemas, you can add `supports_schema: true` to support this feature.
51 |
52 | If a model is a vision model, you can add `vision: true` to support this feature and use image attachments.
53 |
54 | If a model is an audio model, you can add `audio: true` to support this feature and use audio attachments.
55 |
56 | Having configured the model like this, run `llm models` to check that it installed correctly. You can then run prompts against it like so:
57 |
58 | ```bash
59 | llm -m orca-openai-compat 'What is the capital of France?'
60 | ```
61 | And confirm they were logged correctly with:
62 | ```bash
63 | llm logs -n 1
64 | ```
65 |
66 | ### Extra HTTP headers
67 |
68 | Some providers such as [openrouter.ai](https://openrouter.ai/docs) may require the setting of additional HTTP headers. You can set those using the `headers:` key like this:
69 |
70 | ```yaml
71 | - model_id: claude
72 | model_name: anthropic/claude-2
73 | api_base: "https://openrouter.ai/api/v1"
74 | api_key_name: openrouter
75 | headers:
76 | HTTP-Referer: "https://llm.datasette.io/"
77 | X-Title: LLM
78 | ```
79 |
--------------------------------------------------------------------------------
/docs/plugins/directory.md:
--------------------------------------------------------------------------------
1 | (plugin-directory)=
2 | # Plugin directory
3 |
4 | The following plugins are available for LLM. Here's {ref}`how to install them `.
5 |
6 | ## Local models
7 |
8 | These plugins all help you run LLMs directly on your own computer:
9 |
10 | - **[llm-gguf](https://github.com/simonw/llm-gguf)** uses [llama.cpp](https://github.com/ggerganov/llama.cpp) to run models published in the GGUF format.
11 | - **[llm-mlx](https://github.com/simonw/llm-mlx)** (Mac only) uses Apple's MLX framework to provide extremely high performance access to a large number of local models.
12 | - **[llm-ollama](https://github.com/taketwo/llm-ollama)** adds support for local models run using [Ollama](https://ollama.ai/).
13 | - **[llm-llamafile](https://github.com/simonw/llm-llamafile)** adds support for local models that are running locally using [llamafile](https://github.com/Mozilla-Ocho/llamafile).
14 | - **[llm-mlc](https://github.com/simonw/llm-mlc)** can run local models released by the [MLC project](https://mlc.ai/mlc-llm/), including models that can take advantage of the GPU on Apple Silicon M1/M2 devices.
15 | - **[llm-gpt4all](https://github.com/simonw/llm-gpt4all)** adds support for various models released by the [GPT4All](https://gpt4all.io/) project that are optimized to run locally on your own machine. These models include versions of Vicuna, Orca, Falcon and MPT - here's [a full list of models](https://observablehq.com/@simonw/gpt4all-models).
16 | - **[llm-mpt30b](https://github.com/simonw/llm-mpt30b)** adds support for the [MPT-30B](https://huggingface.co/mosaicml/mpt-30b) local model.
17 |
18 | ## Remote APIs
19 |
20 | These plugins can be used to interact with remotely hosted models via their API:
21 |
22 | - **[llm-mistral](https://github.com/simonw/llm-mistral)** adds support for [Mistral AI](https://mistral.ai/)'s language and embedding models.
23 | - **[llm-gemini](https://github.com/simonw/llm-gemini)** adds support for Google's [Gemini](https://ai.google.dev/docs) models.
24 | - **[llm-anthropic](https://github.com/simonw/llm-anthropic)** supports Anthropic's [Claude 3 family](https://www.anthropic.com/news/claude-3-family), [3.5 Sonnet](https://www.anthropic.com/news/claude-3-5-sonnet) and beyond.
25 | - **[llm-command-r](https://github.com/simonw/llm-command-r)** supports Cohere's Command R and [Command R Plus](https://txt.cohere.com/command-r-plus-microsoft-azure/) API models.
26 | - **[llm-reka](https://github.com/simonw/llm-reka)** supports the [Reka](https://www.reka.ai/) family of models via their API.
27 | - **[llm-perplexity](https://github.com/hex/llm-perplexity)** by Alexandru Geana supports the [Perplexity Labs](https://docs.perplexity.ai/) API models, including `llama-3-sonar-large-32k-online` which can search for things online and `llama-3-70b-instruct`.
28 | - **[llm-groq](https://github.com/angerman/llm-groq)** by Moritz Angermann provides access to fast models hosted by [Groq](https://console.groq.com/docs/models).
29 | - **[llm-grok](https://github.com/Hiepler/llm-grok)** by Benedikt Hiepler providing access to Grok model using the xAI API [Grok](https://x.ai/api).
30 | - **[llm-anyscale-endpoints](https://github.com/simonw/llm-anyscale-endpoints)** supports models hosted on the [Anyscale Endpoints](https://app.endpoints.anyscale.com/) platform, including Llama 2 70B.
31 | - **[llm-replicate](https://github.com/simonw/llm-replicate)** adds support for remote models hosted on [Replicate](https://replicate.com/), including Llama 2 from Meta AI.
32 | - **[llm-fireworks](https://github.com/simonw/llm-fireworks)** supports models hosted by [Fireworks AI](https://fireworks.ai/).
33 | - **[llm-openrouter](https://github.com/simonw/llm-openrouter)** provides access to models hosted on [OpenRouter](https://openrouter.ai/).
34 | - **[llm-cohere](https://github.com/Accudio/llm-cohere)** by Alistair Shepherd provides `cohere-generate` and `cohere-summarize` API models, powered by [Cohere](https://cohere.com/).
35 | - **[llm-bedrock](https://github.com/simonw/llm-bedrock)** adds support for Nova by Amazon via Amazon Bedrock.
36 | - **[llm-bedrock-anthropic](https://github.com/sblakey/llm-bedrock-anthropic)** by Sean Blakey adds support for Claude and Claude Instant by Anthropic via Amazon Bedrock.
37 | - **[llm-bedrock-meta](https://github.com/flabat/llm-bedrock-meta)** by Fabian Labat adds support for Llama 2 and Llama 3 by Meta via Amazon Bedrock.
38 | - **[llm-together](https://github.com/wearedevx/llm-together)** adds support for the [Together AI](https://www.together.ai/) extensive family of hosted openly licensed models.
39 | - **[llm-deepseek](https://github.com/abrasumente233/llm-deepseek)** adds support for the [DeepSeek](https://deepseek.com)'s DeepSeek-Chat and DeepSeek-Coder models.
40 | - **[llm-lambda-labs](https://github.com/simonw/llm-lambda-labs)** provides access to models hosted by [Lambda Labs](https://docs.lambdalabs.com/public-cloud/lambda-chat-api/), including the Nous Hermes 3 series.
41 | - **[llm-venice](https://github.com/ar-jan/llm-venice)** provides access to uncensored models hosted by privacy-focused [Venice AI](https://docs.venice.ai/), including Llama 3.1 405B.
42 |
43 | If an API model host provides an OpenAI-compatible API you can also [configure LLM to talk to it](https://llm.datasette.io/en/stable/other-models.html#openai-compatible-models) without needing an extra plugin.
44 |
45 | ## Tools
46 |
47 | The following plugins add new {ref}`tools ` that can be used by models:
48 |
49 | - **[llm-tools-simpleeval](https://github.com/simonw/llm-tools-simpleeval)** implements simple expression support for things like mathematics.
50 | - **[llm-tools-quickjs](https://github.com/simonw/llm-tools-quickjs)** provides access to a sandboxed QuickJS JavaScript interpreter, allowing LLMs to run JavaScript code. The environment persists between calls so the model can set variables and build functions and reuse them later on.
51 | - **[llm-tools-sqlite](https://github.com/simonw/llm-tools-sqlite)** can run read-only SQL queries against local SQLite databases.
52 | - **[llm-tools-datasette](https://github.com/simonw/llm-tools-datasette)** can run SQL queries against a remote [Datasette](https://datasette.io/) instance.
53 | - **[llm-tools-exa](https://github.com/daturkel/llm-tools-exa)** by Dan Turkel can perform web searches and question-answering using [exa.ai](https://exa.ai/).
54 | - **[llm-tools-rag](https://github.com/daturkel/llm-tools-rag)** by Dan Turkel can perform searches over your LLM embedding collections for simple RAG.
55 |
56 | ## Fragments and template loaders
57 |
58 | {ref}`LLM 0.24 ` introduced support for plugins that define `-f prefix:value` or `-t prefix:value` custom loaders for fragments and templates.
59 |
60 | - **[llm-video-frames](https://github.com/simonw/llm-video-frames)** uses `ffmpeg` to turn a video into a sequence of JPEG frames suitable for feeding into a vision model that doesn't support video inputs: `llm -f video-frames:video.mp4 'describe the key scenes in this video'`.
61 | - **[llm-templates-github](https://github.com/simonw/llm-templates-github)** supports loading templates shared on GitHub, e.g. `llm -t gh:simonw/pelican-svg`.
62 | - **[llm-templates-fabric](https://github.com/simonw/llm-templates-fabric)** provides access to the [Fabric](https://github.com/danielmiessler/fabric) collection of prompts: `cat setup.py | llm -t fabric:explain_code`.
63 | - **[llm-fragments-github](https://github.com/simonw/llm-fragments-github)** can load entire GitHub repositories in a single operation: `llm -f github:simonw/files-to-prompt 'explain this code'`. It can also fetch issue threads as Markdown using `llm -f issue:https://github.com/simonw/llm-fragments-github/issues/3`.
64 | - **[llm-hacker-news](https://github.com/simonw/llm-hacker-news)** imports conversations from Hacker News as fragments: `llm -f hn:43615912 'summary with illustrative direct quotes'`.
65 | - **[llm-fragments-pypi](https://github.com/samueldg/llm-fragments-pypi)** loads [PyPI](https://pypi.org/) packages' description and metadata as fragments: `llm -f pypi:ruff "What flake8 plugins does ruff re-implement?"`.
66 | - **[llm-fragments-pdf](https://github.com/daturkel/llm-fragments-pdf)** by Dan Turkel converts PDFs to markdown with [PyMuPDF4LLM](https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/index.html) to use as fragments: `llm -f pdf:something.pdf "what's this about?"`.
67 | - **[llm-fragments-site-text](https://github.com/daturkel/llm-fragments-site-text)** by Dan Turkel converts websites to markdown with [Trafilatura](https://trafilatura.readthedocs.io/en/latest/) to use as fragments: `llm -f site:https://example.com "summarize this"`.
68 | - **[llm-fragments-reader](https://github.com/simonw/llm-fragments-reader)** runs a URL theough the Jina Reader API: `llm -f 'reader:https://simonwillison.net/tags/jina/' summary`.
69 |
70 | ## Embedding models
71 |
72 | {ref}`Embedding models ` are models that can be used to generate and store embedding vectors for text.
73 |
74 | - **[llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers)** adds support for embeddings using the [sentence-transformers](https://www.sbert.net/) library, which provides access to [a wide range](https://www.sbert.net/docs/pretrained_models.html) of embedding models.
75 | - **[llm-clip](https://github.com/simonw/llm-clip)** provides the [CLIP](https://openai.com/research/clip) model, which can be used to embed images and text in the same vector space, enabling text search against images. See [Build an image search engine with llm-clip](https://simonwillison.net/2023/Sep/12/llm-clip-and-chat/) for more on this plugin.
76 | - **[llm-embed-jina](https://github.com/simonw/llm-embed-jina)** provides Jina AI's [8K text embedding models](https://jina.ai/news/jina-ai-launches-worlds-first-open-source-8k-text-embedding-rivaling-openai/).
77 | - **[llm-embed-onnx](https://github.com/simonw/llm-embed-onnx)** provides seven embedding models that can be executed using the ONNX model framework.
78 |
79 | ## Extra commands
80 |
81 | - **[llm-cmd](https://github.com/simonw/llm-cmd)** accepts a prompt for a shell command, runs that prompt and populates the result in your shell so you can review it, edit it and then hit `` to execute or `ctrl+c` to cancel.
82 | - **[llm-cmd-comp](https://github.com/CGamesPlay/llm-cmd-comp)** provides a key binding for your shell that will launch a chat to build the command. When ready, hit `` and it will go right back into your shell command line, so you can run it.
83 | - **[llm-python](https://github.com/simonw/llm-python)** adds a `llm python` command for running a Python interpreter in the same virtual environment as LLM. This is useful for debugging, and also provides a convenient way to interact with the LLM {ref}`python-api` if you installed LLM using Homebrew or `pipx`.
84 | - **[llm-cluster](https://github.com/simonw/llm-cluster)** adds a `llm cluster` command for calculating clusters for a collection of embeddings. Calculated clusters can then be passed to a Large Language Model to generate a summary description.
85 | - **[llm-jq](https://github.com/simonw/llm-jq)** lets you pipe in JSON data and a prompt describing a `jq` program, then executes the generated program against the JSON.
86 |
87 | ## Just for fun
88 |
89 | - **[llm-markov](https://github.com/simonw/llm-markov)** adds a simple model that generates output using a [Markov chain](https://en.wikipedia.org/wiki/Markov_chain). This example is used in the tutorial [Writing a plugin to support a new model](https://llm.datasette.io/en/latest/plugins/tutorial-model-plugin.html).
90 |
--------------------------------------------------------------------------------
/docs/plugins/index.md:
--------------------------------------------------------------------------------
1 | (plugins)=
2 | # Plugins
3 |
4 | LLM plugins can enhance LLM by making alternative Large Language Models available, either via API or by running the models locally on your machine.
5 |
6 | Plugins can also add new commands to the `llm` CLI tool.
7 |
8 | The {ref}`plugin directory ` lists available plugins that you can install and use.
9 |
10 | {ref}`tutorial-model-plugin` describes how to build a new plugin in detail.
11 |
12 | ```{toctree}
13 | ---
14 | maxdepth: 3
15 | ---
16 | installing-plugins
17 | directory
18 | plugin-hooks
19 | tutorial-model-plugin
20 | advanced-model-plugins
21 | plugin-utilities
22 | ```
23 |
--------------------------------------------------------------------------------
/docs/plugins/installing-plugins.md:
--------------------------------------------------------------------------------
1 | (installing-plugins)=
2 | # Installing plugins
3 |
4 | Plugins must be installed in the same virtual environment as LLM itself.
5 |
6 | You can find names of plugins to install in the {ref}`plugin directory `
7 |
8 | Use the `llm install` command (a thin wrapper around `pip install`) to install plugins in the correct environment:
9 | ```bash
10 | llm install llm-gpt4all
11 | ```
12 | Plugins can be uninstalled with `llm uninstall`:
13 | ```bash
14 | llm uninstall llm-gpt4all -y
15 | ```
16 | The `-y` flag skips asking for confirmation.
17 |
18 | You can see additional models that have been added by plugins by running:
19 | ```bash
20 | llm models
21 | ```
22 | Or add `--options` to include details of the options available for each model:
23 | ```bash
24 | llm models --options
25 | ```
26 | To run a prompt against a newly installed model, pass its name as the `-m/--model` option:
27 | ```bash
28 | llm -m orca-mini-3b-gguf2-q4_0 'What is the capital of France?'
29 | ```
30 |
31 | ## Listing installed plugins
32 |
33 | Run `llm plugins` to list installed plugins:
34 |
35 | ```bash
36 | llm plugins
37 | ```
38 | ```json
39 | [
40 | {
41 | "name": "llm-anthropic",
42 | "hooks": [
43 | "register_models"
44 | ],
45 | "version": "0.11"
46 | },
47 | {
48 | "name": "llm-gguf",
49 | "hooks": [
50 | "register_commands",
51 | "register_models"
52 | ],
53 | "version": "0.1a0"
54 | },
55 | {
56 | "name": "llm-clip",
57 | "hooks": [
58 | "register_commands",
59 | "register_embedding_models"
60 | ],
61 | "version": "0.1"
62 | },
63 | {
64 | "name": "llm-cmd",
65 | "hooks": [
66 | "register_commands"
67 | ],
68 | "version": "0.2a0"
69 | },
70 | {
71 | "name": "llm-gemini",
72 | "hooks": [
73 | "register_embedding_models",
74 | "register_models"
75 | ],
76 | "version": "0.3"
77 | }
78 | ]
79 | ```
80 |
81 | (llm-load-plugins)=
82 | ## Running with a subset of plugins
83 |
84 | By default, LLM will load all plugins that are installed in the same virtual environment as LLM itself.
85 |
86 | You can control the set of plugins that is loaded using the `LLM_LOAD_PLUGINS` environment variable.
87 |
88 | Set that to the empty string to disable all plugins:
89 |
90 | ```bash
91 | LLM_LOAD_PLUGINS='' llm ...
92 | ```
93 | Or to a comma-separated list of plugin names to load only those plugins:
94 |
95 | ```bash
96 | LLM_LOAD_PLUGINS='llm-gpt4all,llm-cluster' llm ...
97 | ```
98 | You can use the `llm plugins` command to check that it is working correctly:
99 | ```
100 | LLM_LOAD_PLUGINS='' llm plugins
101 | ```
102 |
--------------------------------------------------------------------------------
/docs/plugins/llm-markov/llm_markov.py:
--------------------------------------------------------------------------------
1 | import llm
2 | import random
3 | import time
4 | from typing import Optional
5 | from pydantic import field_validator, Field
6 |
7 |
8 | @llm.hookimpl
9 | def register_models(register):
10 | register(Markov())
11 |
12 |
13 | def build_markov_table(text):
14 | words = text.split()
15 | transitions = {}
16 | # Loop through all but the last word
17 | for i in range(len(words) - 1):
18 | word = words[i]
19 | next_word = words[i + 1]
20 | transitions.setdefault(word, []).append(next_word)
21 | return transitions
22 |
23 |
24 | def generate(transitions, length, start_word=None):
25 | all_words = list(transitions.keys())
26 | next_word = start_word or random.choice(all_words)
27 | for i in range(length):
28 | yield next_word
29 | options = transitions.get(next_word) or all_words
30 | next_word = random.choice(options)
31 |
32 |
33 | class Markov(llm.Model):
34 | model_id = "markov"
35 | can_stream = True
36 |
37 | class Options(llm.Options):
38 | length: Optional[int] = Field(
39 | description="Number of words to generate", default=None
40 | )
41 | delay: Optional[float] = Field(
42 | description="Seconds to delay between each token", default=None
43 | )
44 |
45 | @field_validator("length")
46 | def validate_length(cls, length):
47 | if length is None:
48 | return None
49 | if length < 2:
50 | raise ValueError("length must be >= 2")
51 | return length
52 |
53 | @field_validator("delay")
54 | def validate_delay(cls, delay):
55 | if delay is None:
56 | return None
57 | if not 0 <= delay <= 10:
58 | raise ValueError("delay must be between 0 and 10")
59 | return delay
60 |
61 | def execute(self, prompt, stream, response, conversation):
62 | text = prompt.prompt
63 | transitions = build_markov_table(text)
64 | length = prompt.options.length or 20
65 | for word in generate(transitions, length):
66 | yield word + " "
67 | if prompt.options.delay:
68 | time.sleep(prompt.options.delay)
69 |
--------------------------------------------------------------------------------
/docs/plugins/llm-markov/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "llm-markov"
3 | version = "0.1"
4 |
5 | [project.entry-points.llm]
6 | markov = "llm_markov"
--------------------------------------------------------------------------------
/docs/plugins/plugin-hooks.md:
--------------------------------------------------------------------------------
1 | (plugin-hooks)=
2 | # Plugin hooks
3 |
4 | Plugins use **plugin hooks** to customize LLM's behavior. These hooks are powered by the [Pluggy plugin system](https://pluggy.readthedocs.io/).
5 |
6 | Each plugin can implement one or more hooks using the @hookimpl decorator against one of the hook function names described on this page.
7 |
8 | LLM imitates the Datasette plugin system. The [Datasette plugin documentation](https://docs.datasette.io/en/stable/writing_plugins.html) describes how plugins work.
9 |
10 | (plugin-hooks-register-commands)=
11 | ## register_commands(cli)
12 |
13 | This hook adds new commands to the `llm` CLI tool - for example `llm extra-command`.
14 |
15 | This example plugin adds a new `hello-world` command that prints "Hello world!":
16 |
17 | ```python
18 | from llm import hookimpl
19 | import click
20 |
21 | @hookimpl
22 | def register_commands(cli):
23 | @cli.command(name="hello-world")
24 | def hello_world():
25 | "Print hello world"
26 | click.echo("Hello world!")
27 | ```
28 | This new command will be added to `llm --help` and can be run using `llm hello-world`.
29 |
30 | (plugin-hooks-register-models)=
31 | ## register_models(register)
32 |
33 | This hook can be used to register one or more additional models.
34 |
35 | ```python
36 | import llm
37 |
38 | @llm.hookimpl
39 | def register_models(register):
40 | register(HelloWorld())
41 |
42 | class HelloWorld(llm.Model):
43 | model_id = "helloworld"
44 |
45 | def execute(self, prompt, stream, response):
46 | return ["hello world"]
47 | ```
48 | If your model includes an async version, you can register that too:
49 |
50 | ```python
51 | class AsyncHelloWorld(llm.AsyncModel):
52 | model_id = "helloworld"
53 |
54 | async def execute(self, prompt, stream, response):
55 | return ["hello world"]
56 |
57 | @llm.hookimpl
58 | def register_models(register):
59 | register(HelloWorld(), AsyncHelloWorld(), aliases=("hw",))
60 | ```
61 | This demonstrates how to register a model with both sync and async versions, and how to specify an alias for that model.
62 |
63 | The {ref}`model plugin tutorial ` describes how to use this hook in detail. Asynchronous models {ref}`are described here `.
64 |
65 | (plugin-hooks-register-embedding-models)=
66 | ## register_embedding_models(register)
67 |
68 | This hook can be used to register one or more additional embedding models, as described in {ref}`embeddings-writing-plugins`.
69 |
70 | ```python
71 | import llm
72 |
73 | @llm.hookimpl
74 | def register_embedding_models(register):
75 | register(HelloWorld())
76 |
77 | class HelloWorld(llm.EmbeddingModel):
78 | model_id = "helloworld"
79 |
80 | def embed_batch(self, items):
81 | return [[1, 2, 3], [4, 5, 6]]
82 | ```
83 |
84 | (plugin-hooks-register-tools)=
85 | ## register_tools(register)
86 |
87 | This hook can register one or more tool functions for use with LLM. See {ref}`the tools documentation ` for more details.
88 |
89 | This example registers two tools: `upper` and `count_character_in_word`.
90 |
91 | ```python
92 | import llm
93 |
94 | def upper(text: str) -> str:
95 | """Convert text to uppercase."""
96 | return text.upper()
97 |
98 | def count_char(text: str, character: str) -> int:
99 | """Count the number of occurrences of a character in a word."""
100 | return text.count(character)
101 |
102 | @llm.hookimpl
103 | def register_tools(register):
104 | register(upper)
105 | # Here the name= argument is used to specify a different name for the tool:
106 | register(count_char, name="count_character_in_word")
107 | ```
108 |
109 | Tools can also be implemented as classes, as described in {ref}`Toolbox classes ` in the Python API documentation.
110 |
111 | You can register classes like the `Memory` example from there by passing the class (_not_ an instance of the class) to `register()`:
112 |
113 | ```python
114 | import llm
115 |
116 | class Memory(llm.Toolbox):
117 | ...
118 |
119 | @llm.hookimpl
120 | def register_tools(register):
121 | register(Memory)
122 | ```
123 | Once installed, this tool can be used like so:
124 |
125 | ```bash
126 | llm chat -T Memory
127 | ```
128 | If a tool name starts with a capital letter it is assumed to be a toolbox class, not a regular tool function.
129 |
130 | Here's an example session with the Memory tool:
131 | ```
132 | Chatting with gpt-4.1-mini
133 | Type 'exit' or 'quit' to exit
134 | Type '!multi' to enter multiple lines, then '!end' to finish
135 | Type '!edit' to open your default editor and modify the prompt
136 | Type '!fragment [ ...]' to insert one or more fragments
137 | > Remember my name is Henry
138 |
139 | Tool call: Memory_set({'key': 'user_name', 'value': 'Henry'})
140 | null
141 |
142 | Got it, Henry! I'll remember your name. How can I assist you today?
143 | > what keys are there?
144 |
145 | Tool call: Memory_keys({})
146 | [
147 | "user_name"
148 | ]
149 |
150 | Currently, there is one key stored: "user_name". Would you like to add or retrieve any information?
151 | > read it
152 |
153 | Tool call: Memory_get({'key': 'user_name'})
154 | Henry
155 |
156 | The value stored under the key "user_name" is Henry. Is there anything else you'd like to do?
157 | > add Barrett to it
158 |
159 | Tool call: Memory_append({'key': 'user_name', 'value': 'Barrett'})
160 | null
161 |
162 | I have added "Barrett" to the key "user_name". If you want, I can now show you the updated value.
163 | > show value
164 |
165 | Tool call: Memory_get({'key': 'user_name'})
166 | Henry
167 | Barrett
168 |
169 | The value stored under the key "user_name" is now:
170 | Henry
171 | Barrett
172 |
173 | Is there anything else you would like to do?
174 | ```
175 |
176 | (plugin-hooks-register-template-loaders)=
177 | ## register_template_loaders(register)
178 |
179 | Plugins can register new {ref}`template loaders ` using the `register_template_loaders` hook.
180 |
181 | Template loaders work with the `llm -t prefix:name` syntax. The prefix specifies the loader, then the registered loader function is called with the name as an argument. The loader function should return an `llm.Template()` object.
182 |
183 | This example plugin registers `my-prefix` as a new template loader. Once installed it can be used like this:
184 |
185 | ```bash
186 | llm -t my-prefix:my-template
187 | ```
188 | Here's the Python code:
189 |
190 | ```python
191 | import llm
192 |
193 | @llm.hookimpl
194 | def register_template_loaders(register):
195 | register("my-prefix", my_template_loader)
196 |
197 | def my_template_loader(template_path: str) -> llm.Template:
198 | """
199 | Documentation for the template loader goes here. It will be displayed
200 | when users run the 'llm templates loaders' command.
201 | """
202 | try:
203 | # Your logic to fetch the template content
204 | # This is just an example:
205 | prompt = "This is a sample prompt for {}".format(template_path)
206 | system = "You are an assistant specialized in {}".format(template_path)
207 |
208 | # Return a Template object with the required fields
209 | return llm.Template(
210 | name=template_path,
211 | prompt=prompt,
212 | system=system,
213 | )
214 | except Exception as e:
215 | # Raise a ValueError with a clear message if the template cannot be found
216 | raise ValueError(f"Template '{template_path}' could not be loaded: {str(e)}")
217 | ```
218 | The `llm.Template` class has the following constructor:
219 |
220 | ```{eval-rst}
221 | .. autoclass:: llm.Template
222 | ```
223 |
224 | The loader function should raise a `ValueError` if the template cannot be found or loaded correctly, providing a clear error message.
225 |
226 | Note that `functions:` provided by templates using this plugin hook will not be made available, to avoid the risk of plugin hooks that load templates from remote sources introducing arbitrary code execution vulnerabilities.
227 |
228 | (plugin-hooks-register-fragment-loaders)=
229 | ## register_fragment_loaders(register)
230 |
231 | Plugins can register new fragment loaders using the `register_template_loaders` hook. These can then be used with the `llm -f prefix:argument` syntax.
232 |
233 | Fragment loader plugins differ from template loader plugins in that you can stack more than one fragment loader call together in the same prompt.
234 |
235 | A fragment loader can return one or more string fragments or attachments, or a mixture of the two. The fragments will be concatenated together into the prompt string, while any attachments will be added to the list of attachments to be sent to the model.
236 |
237 | The `prefix` specifies the loader. The `argument` will be passed to that registered callback..
238 |
239 | The callback works in a very similar way to template loaders, but returns either a single `llm.Fragment`, a list of `llm.Fragment` objects, a single `llm.Attachment`, or a list that can mix `llm.Attachment` and `llm.Fragment` objects.
240 |
241 | The `llm.Fragment` constructor takes a required string argument (the content of the fragment) and an optional second `source` argument, which is a string that may be displayed as debug information. For files this is a path and for URLs it is a URL. Your plugin can use anything you like for the `source` value.
242 |
243 | See {ref}`the Python API documentation for attachments ` for details of the `llm.Attachment` class.
244 |
245 | Here is some example code:
246 |
247 | ```python
248 | import llm
249 |
250 | @llm.hookimpl
251 | def register_fragment_loaders(register):
252 | register("my-fragments", my_fragment_loader)
253 |
254 |
255 | def my_fragment_loader(argument: str) -> llm.Fragment:
256 | """
257 | Documentation for the fragment loader goes here. It will be displayed
258 | when users run the 'llm fragments loaders' command.
259 | """
260 | try:
261 | fragment = "Fragment content for {}".format(argument)
262 | source = "my-fragments:{}".format(argument)
263 | return llm.Fragment(fragment, source)
264 | except Exception as ex:
265 | # Raise a ValueError with a clear message if the fragment cannot be loaded
266 | raise ValueError(
267 | f"Fragment 'my-fragments:{argument}' could not be loaded: {str(ex)}"
268 | )
269 |
270 | # Or for the case where you want to return multiple fragments and attachments:
271 | def my_fragment_loader(argument: str) -> list[llm.Fragment]:
272 | "Docs go here."
273 | return [
274 | llm.Fragment("Fragment 1 content", "my-fragments:{argument}"),
275 | llm.Fragment("Fragment 2 content", "my-fragments:{argument}"),
276 | llm.Attachment(path="/path/to/image.png"),
277 | ]
278 | ```
279 | A plugin like this one can be called like so:
280 | ```bash
281 | llm -f my-fragments:argument
282 | ```
283 | If multiple fragments are returned they will be used as if the user passed multiple `-f X` arguments to the command.
284 |
285 | Multiple fragments are particularly useful for things like plugins that return every file in a directory. If these were concatenated together by the plugin, a change to a single file would invalidate the de-duplicatino cache for that whole fragment. Giving each file its own fragment means we can avoid storing multiple copies of that full collection if only a single file has changed.
286 |
--------------------------------------------------------------------------------
/docs/plugins/plugin-utilities.md:
--------------------------------------------------------------------------------
1 | (plugin-utilities)=
2 | # Utility functions for plugins
3 |
4 | LLM provides some utility functions that may be useful to plugins.
5 |
6 | (plugin-utilities-get-key)=
7 | ## llm.get_key()
8 |
9 | This method can be used to look up secrets that users have stored using the {ref}`llm keys set ` command. If your plugin needs to access an API key or other secret this can be a convenient way to provide that.
10 |
11 | This returns either a string containing the key or `None` if the key could not be resolved.
12 |
13 | Use the `alias="name"` option to retrieve the key set with that alias:
14 |
15 | ```python
16 | github_key = llm.get_key(alias="github")
17 | ```
18 | You can also add `env="ENV_VAR"` to fall back to looking in that environment variable if the key has not been configured:
19 | ```python
20 | github_key = llm.get_key(alias="github", env="GITHUB_TOKEN")
21 | ```
22 | In some cases you may allow users to provide a key as input, where they could input either the key itself or specify an alias to lookup in `keys.json`. Use the `input=` parameter for that:
23 |
24 | ```python
25 | github_key = llm.get_key(input=input_from_user, alias="github", env="GITHUB_TOKEN")
26 | ```
27 |
28 | An previous version of function used positional arguments in a confusing order. These are still supported but the new keyword arguments are recommended as a better way to use `llm.get_key()` going forward.
29 |
30 | (plugin-utilities-user-dir)=
31 | ## llm.user_dir()
32 |
33 | LLM stores various pieces of logging and configuration data in a directory on the user's machine.
34 |
35 | On macOS this directory is `~/Library/Application Support/io.datasette.llm`, but this will differ on other operating systems.
36 |
37 | The `llm.user_dir()` function returns the path to this directory as a `pathlib.Path` object, after creating that directory if it does not yet exist.
38 |
39 | Plugins can use this to store their own data in a subdirectory of this directory.
40 |
41 | ```python
42 | import llm
43 | user_dir = llm.user_dir()
44 | plugin_dir = data_path = user_dir / "my-plugin"
45 | plugin_dir.mkdir(exist_ok=True)
46 | data_path = plugin_dir / "plugin-data.db"
47 | ```
48 |
49 | (plugin-utilities-modelerror)=
50 | ## llm.ModelError
51 |
52 | If your model encounters an error that should be reported to the user you can raise this exception. For example:
53 |
54 | ```python
55 | import llm
56 |
57 | raise ModelError("MPT model not installed - try running 'llm mpt30b download'")
58 | ```
59 | This will be caught by the CLI layer and displayed to the user as an error message.
60 |
61 | (plugin-utilities-response-fake)=
62 | ## Response.fake()
63 |
64 | When writing tests for a model it can be useful to generate fake response objects, for example in this test from [llm-mpt30b](https://github.com/simonw/llm-mpt30b):
65 |
66 | ```python
67 | def test_build_prompt_conversation():
68 | model = llm.get_model("mpt")
69 | conversation = model.conversation()
70 | conversation.responses = [
71 | llm.Response.fake(model, "prompt 1", "system 1", "response 1"),
72 | llm.Response.fake(model, "prompt 2", None, "response 2"),
73 | llm.Response.fake(model, "prompt 3", None, "response 3"),
74 | ]
75 | lines = model.build_prompt(llm.Prompt("prompt 4", model), conversation)
76 | assert lines == [
77 | "<|im_start|>system\system 1<|im_end|>\n",
78 | "<|im_start|>user\nprompt 1<|im_end|>\n",
79 | "<|im_start|>assistant\nresponse 1<|im_end|>\n",
80 | "<|im_start|>user\nprompt 2<|im_end|>\n",
81 | "<|im_start|>assistant\nresponse 2<|im_end|>\n",
82 | "<|im_start|>user\nprompt 3<|im_end|>\n",
83 | "<|im_start|>assistant\nresponse 3<|im_end|>\n",
84 | "<|im_start|>user\nprompt 4<|im_end|>\n",
85 | "<|im_start|>assistant\n",
86 | ]
87 | ```
88 | The signature of `llm.Response.fake()` is:
89 |
90 | ```python
91 | def fake(cls, model: Model, prompt: str, system: str, response: str):
92 | ```
93 |
--------------------------------------------------------------------------------
/docs/related-tools.md:
--------------------------------------------------------------------------------
1 | (related-tools)=
2 | # Related tools
3 |
4 | The following tools are designed to be used with LLM:
5 |
6 | (related-tools-strip-tags)=
7 | ## strip-tags
8 |
9 | [strip-tags](https://github.com/simonw/strip-tags) is a command for stripping tags from HTML. This is useful when working with LLMs because HTML tags can use up a lot of your token budget.
10 |
11 | Here's how to summarize the front page of the New York Times, by both stripping tags and filtering to just the elements with `class="story-wrapper"`:
12 |
13 | ```bash
14 | curl -s https://www.nytimes.com/ \
15 | | strip-tags .story-wrapper \
16 | | llm -s 'summarize the news'
17 | ```
18 |
19 | [llm, ttok and strip-tags—CLI tools for working with ChatGPT and other LLMs](https://simonwillison.net/2023/May/18/cli-tools-for-llms/) describes ways to use `strip-tags` in more detail.
20 |
21 | (related-tools-ttok)=
22 | ## ttok
23 |
24 | [ttok](https://github.com/simonw/ttok) is a command-line tool for counting OpenAI tokens. You can use it to check if input is likely to fit in the token limit for GPT 3.5 or GPT4:
25 |
26 | ```bash
27 | cat my-file.txt | ttok
28 | ```
29 | ```
30 | 125
31 | ```
32 | It can also truncate input down to a desired number of tokens:
33 | ```bash
34 | ttok This is too many tokens -t 3
35 | ```
36 | ```
37 | This is too
38 | ```
39 | This is useful for truncating a large document down to a size where it can be processed by an LLM.
40 |
41 | (related-tools-symbex)=
42 | ## Symbex
43 |
44 | [Symbex](https://github.com/simonw/symbex) is a tool for searching for symbols in Python codebases. It's useful for extracting just the code for a specific problem and then piping that into LLM for explanation, refactoring or other tasks.
45 |
46 | Here's how to use it to find all functions that match `test*csv*` and use those to guess what the software under test does:
47 |
48 | ```bash
49 | symbex 'test*csv*' | \
50 | llm --system 'based on these tests guess what this tool does'
51 | ```
52 | It can also be used to export symbols in a format that can be piped to {ref}`llm embed-multi ` in order to create embeddings:
53 | ```bash
54 | symbex '*' '*:*' --nl | \
55 | llm embed-multi symbols - \
56 | --format nl --database embeddings.db --store
57 | ```
58 | For more examples see [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/).
59 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==7.2.6
2 | furo==2023.9.10
3 | sphinx-autobuild
4 | sphinx-copybutton
5 | sphinx-markdown-builder==0.6.8
6 | myst-parser
7 | cogapp
8 |
--------------------------------------------------------------------------------
/docs/setup.md:
--------------------------------------------------------------------------------
1 | # Setup
2 |
3 | ## Installation
4 |
5 | Install this tool using `pip`:
6 | ```bash
7 | pip install llm
8 | ```
9 | Or using [pipx](https://pypa.github.io/pipx/):
10 | ```bash
11 | pipx install llm
12 | ```
13 | Or using [uv](https://docs.astral.sh/uv/guides/tools/) ({ref}`more tips below `):
14 | ```bash
15 | uv tool install llm
16 | ```
17 | Or using [Homebrew](https://brew.sh/) (see {ref}`warning note `):
18 | ```bash
19 | brew install llm
20 | ```
21 |
22 | ## Upgrading to the latest version
23 |
24 | If you installed using `pip`:
25 | ```bash
26 | pip install -U llm
27 | ```
28 | For `pipx`:
29 | ```bash
30 | pipx upgrade llm
31 | ```
32 | For `uv`:
33 | ```bash
34 | uv tool upgrade llm
35 | ```
36 | For Homebrew:
37 | ```bash
38 | brew upgrade llm
39 | ```
40 | If the latest version is not yet available on Homebrew you can upgrade like this instead:
41 | ```bash
42 | llm install -U llm
43 | ```
44 |
45 | (setup-uvx)=
46 | ## Using uvx
47 |
48 | If you have [uv](https://docs.astral.sh/uv/) installed you can also use the `uvx` command to try LLM without first installing it like this:
49 |
50 | ```bash
51 | export OPENAI_API_KEY='sx-...'
52 | uvx llm 'fun facts about skunks'
53 | ```
54 | This will install and run LLM using a temporary virtual environment.
55 |
56 | You can use the `--with` option to add extra plugins. To use Anthropic's models, for example:
57 | ```bash
58 | export ANTHROPIC_API_KEY='...'
59 | uvx --with llm-anthropic llm -m claude-3.5-haiku 'fun facts about skunks'
60 | ```
61 | All of the usual LLM commands will work with `uvx llm`. Here's how to set your OpenAI key without needing an environment variable for example:
62 | ```bash
63 | uvx llm keys set openai
64 | # Paste key here
65 | ```
66 |
67 | (homebrew-warning)=
68 | ## A note about Homebrew and PyTorch
69 |
70 | The version of LLM packaged for Homebrew currently uses Python 3.12. The PyTorch project do not yet have a stable release of PyTorch for that version of Python.
71 |
72 | This means that LLM plugins that depend on PyTorch such as [llm-sentence-transformers](https://github.com/simonw/llm-sentence-transformers) may not install cleanly with the Homebrew version of LLM.
73 |
74 | You can workaround this by manually installing PyTorch before installing `llm-sentence-transformers`:
75 |
76 | ```bash
77 | llm install llm-python
78 | llm python -m pip install \
79 | --pre torch torchvision \
80 | --index-url https://download.pytorch.org/whl/nightly/cpu
81 | llm install llm-sentence-transformers
82 | ```
83 | This should produce a working installation of that plugin.
84 |
85 | ## Installing plugins
86 |
87 | {ref}`plugins` can be used to add support for other language models, including models that can run on your own device.
88 |
89 | For example, the [llm-gpt4all](https://github.com/simonw/llm-gpt4all) plugin adds support for 17 new models that can be installed on your own machine. You can install that like so:
90 | ```bash
91 | llm install llm-gpt4all
92 | ```
93 |
94 | (api-keys)=
95 | ## API key management
96 |
97 | Many LLM models require an API key. These API keys can be provided to this tool using several different mechanisms.
98 |
99 | You can obtain an API key for OpenAI's language models from [the API keys page](https://platform.openai.com/api-keys) on their site.
100 |
101 | ### Saving and using stored keys
102 |
103 | The easiest way to store an API key is to use the `llm keys set` command:
104 |
105 | ```bash
106 | llm keys set openai
107 | ```
108 | You will be prompted to enter the key like this:
109 | ```
110 | % llm keys set openai
111 | Enter key:
112 | ```
113 | Once stored, this key will be automatically used for subsequent calls to the API:
114 |
115 | ```bash
116 | llm "Five ludicrous names for a pet lobster"
117 | ```
118 |
119 | You can list the names of keys that have been set using this command:
120 |
121 | ```bash
122 | llm keys
123 | ```
124 |
125 | Keys that are stored in this way live in a file called `keys.json`. This file is located at the path shown when you run the following command:
126 |
127 | ```bash
128 | llm keys path
129 | ```
130 |
131 | On macOS this will be `~/Library/Application Support/io.datasette.llm/keys.json`. On Linux it may be something like `~/.config/io.datasette.llm/keys.json`.
132 |
133 | ### Passing keys using the --key option
134 |
135 | Keys can be passed directly using the `--key` option, like this:
136 |
137 | ```bash
138 | llm "Five names for pet weasels" --key sk-my-key-goes-here
139 | ```
140 | You can also pass the alias of a key stored in the `keys.json` file. For example, if you want to maintain a personal API key you could add that like this:
141 | ```bash
142 | llm keys set personal
143 | ```
144 | And then use it for prompts like so:
145 |
146 | ```bash
147 | llm "Five friendly names for a pet skunk" --key personal
148 | ```
149 |
150 | ### Keys in environment variables
151 |
152 | Keys can also be set using an environment variable. These are different for different models.
153 |
154 | For OpenAI models the key will be read from the `OPENAI_API_KEY` environment variable.
155 |
156 | The environment variable will be used if no `--key` option is passed to the command and there is not a key configured in `keys.json`
157 |
158 | To use an environment variable in place of the `keys.json` key run the prompt like this:
159 | ```bash
160 | llm 'my prompt' --key $OPENAI_API_KEY
161 | ```
162 |
163 | ## Configuration
164 |
165 | You can configure LLM in a number of different ways.
166 |
167 | (setup-default-model)=
168 | ### Setting a custom default model
169 |
170 | The model used when calling `llm` without the `-m/--model` option defaults to `gpt-4o-mini` - the fastest and least expensive OpenAI model.
171 |
172 | You can use the `llm models default` command to set a different default model. For GPT-4o (slower and more expensive, but more capable) run this:
173 |
174 | ```bash
175 | llm models default gpt-4o
176 | ```
177 | You can view the current model by running this:
178 | ```
179 | llm models default
180 | ```
181 | Any of the supported aliases for a model can be passed to this command.
182 |
183 | ### Setting a custom directory location
184 |
185 | This tool stores various files - prompt templates, stored keys, preferences, a database of logs - in a directory on your computer.
186 |
187 | On macOS this is `~/Library/Application Support/io.datasette.llm/`.
188 |
189 | On Linux it may be something like `~/.config/io.datasette.llm/`.
190 |
191 | You can set a custom location for this directory by setting the `LLM_USER_PATH` environment variable:
192 |
193 | ```bash
194 | export LLM_USER_PATH=/path/to/my/custom/directory
195 | ```
196 | ### Turning SQLite logging on and off
197 |
198 | By default, LLM will log every prompt and response you make to a SQLite database - see {ref}`logging` for more details.
199 |
200 | You can turn this behavior off by default by running:
201 | ```bash
202 | llm logs off
203 | ```
204 | Or turn it back on again with:
205 | ```
206 | llm logs on
207 | ```
208 | Run `llm logs status` to see the current states of the setting.
--------------------------------------------------------------------------------
/docs/tools.md:
--------------------------------------------------------------------------------
1 | (tools)=
2 |
3 | # Tools
4 |
5 | Many Large Language Models have been trained to execute tools as part of responding to a prompt. LLM supports tool usage with both the command-line interface and the Python API.
6 |
7 | Exposing tools to LLMs **carries risks**! Be sure to read the {ref}`warning below `.
8 |
9 | (tools-how-they-work)=
10 |
11 | ## How tools work
12 |
13 | A tool is effectively a function that the model can request to be executed. Here's how that works:
14 |
15 | 1. The initial prompt to the model includes a list of available tools, containing their names, descriptions and parameters.
16 | 2. The model can choose to call one (or sometimes more than one) of those tools, returning a request for the tool to execute.
17 | 3. The code that calls the model - in this case LLM itself - then executes the specified tool with the provided arguments.
18 | 4. LLM prompts the model a second time, this time including the output of the tool execution.
19 | 5. The model can then use that output to generate its next response.
20 |
21 | This sequence can run several times in a loop, allowing the LLM to access data, act on that data and then pass that data off to other tools for further processing.
22 |
23 | :::{admonition} Tools can be dangerous
24 | :class: danger
25 |
26 | (tools-warning)=
27 |
28 | ## Warning: Tools can be dangerous
29 |
30 | Applications built on top of LLMs suffer from a class of attacks called [prompt injection](https://simonwillison.net/tags/prompt-injection/) attacks. These occur when a malicious third party injects content into the LLM which causes it to take tool-based actions that act against the interests of the user of that application.
31 |
32 | Be very careful about which tools you enable when you potentially might be exposed to untrusted sources of content - web pages, GitHub issues posted by other people, email and messages that have been sent to you that could come from an attacker.
33 |
34 | Watch out for the **lethal trifecta** of prompt injection exfiltration attacks. If your tool-enabled LLM has the following:
35 |
36 | - access to private data
37 | - exposure to malicious instructions
38 | - the ability to exfiltrate information
39 |
40 | Anyone who can feed malicious instructions into your LLM - by leaving them on a web page it visits, or sending an email to an inbox that it monitors - could be able to trick your LLM into using other tools to access your private information and then exfiltrate (pass out) that data to somewhere the attacker can see it.
41 | :::
42 |
43 | (tools-trying-out)=
44 |
45 | ## Trying out tools
46 |
47 | LLM comes with a default tool installed, called `llm_version`. You can try that out like this:
48 |
49 | ```bash
50 | llm --tool llm_version "What version of LLM is this?" --td
51 | ```
52 | You can also use `-T llm_version` as a shortcut for `--tool llm_version`.
53 |
54 | The output should look like this:
55 | ```
56 | Tool call: llm_version({})
57 | 0.26a0
58 |
59 | The installed version of the LLM is 0.26a0.
60 | ```
61 | Further tools can be installed using plugins, or you can use the `llm --functions` option to pass tools implemented as PYthon functions directly, as {ref}`described here `.
62 |
63 | (tools-implementation)=
64 |
65 | ## LLM's implementation of tools
66 |
67 | In LLM every tool is a defined as a Python function. The function can take any number of arguments and can return a string or an object that can be converted to a string.
68 |
69 | Tool functions should include a docstring that describes what the function does. This docstring will become the description that is passed to the model.
70 |
71 | Tools can also be defined as {ref}`toolbox classes `, a subclass of `llm.Toolbox` that allows multiple related tools to be bundled together. Toolbox classes can be be configured when they are instantiated, and can also maintain state in between multiple tool calls.
72 |
73 | The Python API can accept functions directly. The command-line interface has two ways for tools to be defined: via plugins that implement the {ref}`register_tools() plugin hook `, or directly on the command-line using the `--functions` argument to specify a block of Python code defining one or more functions - or a path to a Python file containing the same.
74 |
75 | You can use tools {ref}`with the LLM command-line tool ` or {ref}`with the Python API `.
76 |
77 | (tools-default)=
78 |
79 | ## Default tools
80 |
81 | LLM includes some default tools for you to try out:
82 |
83 | - `llm_version()` returns the current version of LLM
84 | - `llm_time()` returns the current local and UTC time
85 |
86 | Try them like this:
87 |
88 | ```bash
89 | llm -T llm_version -T llm_time 'Give me the current time and LLM version' --td
90 | ```
91 |
92 | (tools-tips)=
93 |
94 | ## Tips for implementing tools
95 |
96 | Consult the {ref}`register_tools() plugin hook ` documentation for examples of how to implement tools in plugins.
97 |
98 | If your plugin needs access to API secrets I recommend storing those using `llm keys set api-name` and then reading them using the {ref}`plugin-utilities-get-key` utility function. This avoids secrets being logged to the database as part of tool calls.
99 |
100 |
101 |
--------------------------------------------------------------------------------
/llm/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import cli
2 |
3 | if __name__ == "__main__":
4 | cli()
5 |
--------------------------------------------------------------------------------
/llm/default_plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/llm/2292d7a56df25b7fd457b53bcc7d5cfccdf1821c/llm/default_plugins/__init__.py
--------------------------------------------------------------------------------
/llm/default_plugins/default_tools.py:
--------------------------------------------------------------------------------
1 | import llm
2 | from llm.tools import llm_time, llm_version
3 |
4 |
5 | @llm.hookimpl
6 | def register_tools(register):
7 | register(llm_version)
8 | register(llm_time)
9 |
--------------------------------------------------------------------------------
/llm/embeddings_migrations.py:
--------------------------------------------------------------------------------
1 | from sqlite_migrate import Migrations
2 | import hashlib
3 | import time
4 |
5 | embeddings_migrations = Migrations("llm.embeddings")
6 |
7 |
8 | @embeddings_migrations()
9 | def m001_create_tables(db):
10 | db["collections"].create({"id": int, "name": str, "model": str}, pk="id")
11 | db["collections"].create_index(["name"], unique=True)
12 | db["embeddings"].create(
13 | {
14 | "collection_id": int,
15 | "id": str,
16 | "embedding": bytes,
17 | "content": str,
18 | "metadata": str,
19 | },
20 | pk=("collection_id", "id"),
21 | )
22 |
23 |
24 | @embeddings_migrations()
25 | def m002_foreign_key(db):
26 | db["embeddings"].add_foreign_key("collection_id", "collections", "id")
27 |
28 |
29 | @embeddings_migrations()
30 | def m003_add_updated(db):
31 | db["embeddings"].add_column("updated", int)
32 | # Pretty-print the schema
33 | db["embeddings"].transform()
34 | # Assume anything existing was last updated right now
35 | db.query(
36 | "update embeddings set updated = ? where updated is null", [int(time.time())]
37 | )
38 |
39 |
40 | @embeddings_migrations()
41 | def m004_store_content_hash(db):
42 | db["embeddings"].add_column("content_hash", bytes)
43 | db["embeddings"].transform(
44 | column_order=(
45 | "collection_id",
46 | "id",
47 | "embedding",
48 | "content",
49 | "content_hash",
50 | "metadata",
51 | "updated",
52 | )
53 | )
54 |
55 | # Register functions manually so we can de-register later
56 | def md5(text):
57 | return hashlib.md5(text.encode("utf8")).digest()
58 |
59 | def random_md5():
60 | return hashlib.md5(str(time.time()).encode("utf8")).digest()
61 |
62 | db.conn.create_function("temp_md5", 1, md5)
63 | db.conn.create_function("temp_random_md5", 0, random_md5)
64 |
65 | with db.conn:
66 | db.execute(
67 | """
68 | update embeddings
69 | set content_hash = temp_md5(content)
70 | where content is not null
71 | """
72 | )
73 | db.execute(
74 | """
75 | update embeddings
76 | set content_hash = temp_random_md5()
77 | where content is null
78 | """
79 | )
80 |
81 | db["embeddings"].create_index(["content_hash"])
82 |
83 | # De-register functions
84 | db.conn.create_function("temp_md5", 1, None)
85 | db.conn.create_function("temp_random_md5", 0, None)
86 |
87 |
88 | @embeddings_migrations()
89 | def m005_add_content_blob(db):
90 | db["embeddings"].add_column("content_blob", bytes)
91 | db["embeddings"].transform(
92 | column_order=("collection_id", "id", "embedding", "content", "content_blob")
93 | )
94 |
--------------------------------------------------------------------------------
/llm/errors.py:
--------------------------------------------------------------------------------
1 | class ModelError(Exception):
2 | "Models can raise this error, which will be displayed to the user"
3 |
4 |
5 | class NeedsKeyException(ModelError):
6 | "Model needs an API key which has not been provided"
7 |
--------------------------------------------------------------------------------
/llm/hookspecs.py:
--------------------------------------------------------------------------------
1 | from pluggy import HookimplMarker
2 | from pluggy import HookspecMarker
3 |
4 | hookspec = HookspecMarker("llm")
5 | hookimpl = HookimplMarker("llm")
6 |
7 |
8 | @hookspec
9 | def register_commands(cli):
10 | """Register additional CLI commands, e.g. 'llm mycommand ...'"""
11 |
12 |
13 | @hookspec
14 | def register_models(register):
15 | "Register additional model instances representing LLM models that can be called"
16 |
17 |
18 | @hookspec
19 | def register_embedding_models(register):
20 | "Register additional model instances that can be used for embedding"
21 |
22 |
23 | @hookspec
24 | def register_template_loaders(register):
25 | "Register additional template loaders with prefixes"
26 |
27 |
28 | @hookspec
29 | def register_fragment_loaders(register):
30 | "Register additional fragment loaders with prefixes"
31 |
32 |
33 | @hookspec
34 | def register_tools(register):
35 | "Register functions that can be used as tools by the LLMs"
36 |
--------------------------------------------------------------------------------
/llm/migrations.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | from typing import Callable, List
3 |
4 | MIGRATIONS: List[Callable] = []
5 | migration = MIGRATIONS.append
6 |
7 |
8 | def migrate(db):
9 | ensure_migrations_table(db)
10 | already_applied = {r["name"] for r in db["_llm_migrations"].rows}
11 | for fn in MIGRATIONS:
12 | name = fn.__name__
13 | if name not in already_applied:
14 | fn(db)
15 | db["_llm_migrations"].insert(
16 | {
17 | "name": name,
18 | "applied_at": str(datetime.datetime.now(datetime.timezone.utc)),
19 | }
20 | )
21 | already_applied.add(name)
22 |
23 |
24 | def ensure_migrations_table(db):
25 | if not db["_llm_migrations"].exists():
26 | db["_llm_migrations"].create(
27 | {
28 | "name": str,
29 | "applied_at": str,
30 | },
31 | pk="name",
32 | )
33 |
34 |
35 | @migration
36 | def m001_initial(db):
37 | # Ensure the original table design exists, so other migrations can run
38 | if db["log"].exists():
39 | # It needs to have the chat_id column
40 | if "chat_id" not in db["log"].columns_dict:
41 | db["log"].add_column("chat_id")
42 | return
43 | db["log"].create(
44 | {
45 | "provider": str,
46 | "system": str,
47 | "prompt": str,
48 | "chat_id": str,
49 | "response": str,
50 | "model": str,
51 | "timestamp": str,
52 | }
53 | )
54 |
55 |
56 | @migration
57 | def m002_id_primary_key(db):
58 | db["log"].transform(pk="id")
59 |
60 |
61 | @migration
62 | def m003_chat_id_foreign_key(db):
63 | db["log"].transform(types={"chat_id": int})
64 | db["log"].add_foreign_key("chat_id", "log", "id")
65 |
66 |
67 | @migration
68 | def m004_column_order(db):
69 | db["log"].transform(
70 | column_order=(
71 | "id",
72 | "model",
73 | "timestamp",
74 | "prompt",
75 | "system",
76 | "response",
77 | "chat_id",
78 | )
79 | )
80 |
81 |
82 | @migration
83 | def m004_drop_provider(db):
84 | db["log"].transform(drop=("provider",))
85 |
86 |
87 | @migration
88 | def m005_debug(db):
89 | db["log"].add_column("debug", str)
90 | db["log"].add_column("duration_ms", int)
91 |
92 |
93 | @migration
94 | def m006_new_logs_table(db):
95 | columns = db["log"].columns_dict
96 | for column, type in (
97 | ("options_json", str),
98 | ("prompt_json", str),
99 | ("response_json", str),
100 | ("reply_to_id", int),
101 | ):
102 | # It's possible people running development code like myself
103 | # might have accidentally created these columns already
104 | if column not in columns:
105 | db["log"].add_column(column, type)
106 |
107 | # Use .transform() to rename options and timestamp_utc, and set new order
108 | db["log"].transform(
109 | column_order=(
110 | "id",
111 | "model",
112 | "prompt",
113 | "system",
114 | "prompt_json",
115 | "options_json",
116 | "response",
117 | "response_json",
118 | "reply_to_id",
119 | "chat_id",
120 | "duration_ms",
121 | "timestamp_utc",
122 | ),
123 | rename={
124 | "timestamp": "timestamp_utc",
125 | "options": "options_json",
126 | },
127 | )
128 |
129 |
130 | @migration
131 | def m007_finish_logs_table(db):
132 | db["log"].transform(
133 | drop={"debug"},
134 | rename={"timestamp_utc": "datetime_utc"},
135 | drop_foreign_keys=("chat_id",),
136 | )
137 | with db.conn:
138 | db.execute("alter table log rename to logs")
139 |
140 |
141 | @migration
142 | def m008_reply_to_id_foreign_key(db):
143 | db["logs"].add_foreign_key("reply_to_id", "logs", "id")
144 |
145 |
146 | @migration
147 | def m008_fix_column_order_in_logs(db):
148 | # reply_to_id ended up at the end after foreign key added
149 | db["logs"].transform(
150 | column_order=(
151 | "id",
152 | "model",
153 | "prompt",
154 | "system",
155 | "prompt_json",
156 | "options_json",
157 | "response",
158 | "response_json",
159 | "reply_to_id",
160 | "chat_id",
161 | "duration_ms",
162 | "timestamp_utc",
163 | ),
164 | )
165 |
166 |
167 | @migration
168 | def m009_delete_logs_table_if_empty(db):
169 | # We moved to a new table design, but we don't delete the table
170 | # if someone has put data in it
171 | if not db["logs"].count:
172 | db["logs"].drop()
173 |
174 |
175 | @migration
176 | def m010_create_new_log_tables(db):
177 | db["conversations"].create(
178 | {
179 | "id": str,
180 | "name": str,
181 | "model": str,
182 | },
183 | pk="id",
184 | )
185 | db["responses"].create(
186 | {
187 | "id": str,
188 | "model": str,
189 | "prompt": str,
190 | "system": str,
191 | "prompt_json": str,
192 | "options_json": str,
193 | "response": str,
194 | "response_json": str,
195 | "conversation_id": str,
196 | "duration_ms": int,
197 | "datetime_utc": str,
198 | },
199 | pk="id",
200 | foreign_keys=(("conversation_id", "conversations", "id"),),
201 | )
202 |
203 |
204 | @migration
205 | def m011_fts_for_responses(db):
206 | db["responses"].enable_fts(["prompt", "response"], create_triggers=True)
207 |
208 |
209 | @migration
210 | def m012_attachments_tables(db):
211 | db["attachments"].create(
212 | {
213 | "id": str,
214 | "type": str,
215 | "path": str,
216 | "url": str,
217 | "content": bytes,
218 | },
219 | pk="id",
220 | )
221 | db["prompt_attachments"].create(
222 | {
223 | "response_id": str,
224 | "attachment_id": str,
225 | "order": int,
226 | },
227 | foreign_keys=(
228 | ("response_id", "responses", "id"),
229 | ("attachment_id", "attachments", "id"),
230 | ),
231 | pk=("response_id", "attachment_id"),
232 | )
233 |
234 |
235 | @migration
236 | def m013_usage(db):
237 | db["responses"].add_column("input_tokens", int)
238 | db["responses"].add_column("output_tokens", int)
239 | db["responses"].add_column("token_details", str)
240 |
241 |
242 | @migration
243 | def m014_schemas(db):
244 | db["schemas"].create(
245 | {
246 | "id": str,
247 | "content": str,
248 | },
249 | pk="id",
250 | )
251 | db["responses"].add_column("schema_id", str, fk="schemas", fk_col="id")
252 | # Clean up SQL create table indentation
253 | db["responses"].transform()
254 | # These changes may have dropped the FTS configuration, fix that
255 | db["responses"].enable_fts(
256 | ["prompt", "response"], create_triggers=True, replace=True
257 | )
258 |
259 |
260 | @migration
261 | def m015_fragments_tables(db):
262 | db["fragments"].create(
263 | {
264 | "id": int,
265 | "hash": str,
266 | "content": str,
267 | "datetime_utc": str,
268 | "source": str,
269 | },
270 | pk="id",
271 | )
272 | db["fragments"].create_index(["hash"], unique=True)
273 | db["fragment_aliases"].create(
274 | {
275 | "alias": str,
276 | "fragment_id": int,
277 | },
278 | foreign_keys=(("fragment_id", "fragments", "id"),),
279 | pk="alias",
280 | )
281 | db["prompt_fragments"].create(
282 | {
283 | "response_id": str,
284 | "fragment_id": int,
285 | "order": int,
286 | },
287 | foreign_keys=(
288 | ("response_id", "responses", "id"),
289 | ("fragment_id", "fragments", "id"),
290 | ),
291 | pk=("response_id", "fragment_id"),
292 | )
293 | db["system_fragments"].create(
294 | {
295 | "response_id": str,
296 | "fragment_id": int,
297 | "order": int,
298 | },
299 | foreign_keys=(
300 | ("response_id", "responses", "id"),
301 | ("fragment_id", "fragments", "id"),
302 | ),
303 | pk=("response_id", "fragment_id"),
304 | )
305 |
306 |
307 | @migration
308 | def m016_fragments_table_pks(db):
309 | # The same fragment can be attached to a response multiple times
310 | # https://github.com/simonw/llm/issues/863#issuecomment-2781720064
311 | db["prompt_fragments"].transform(pk=("response_id", "fragment_id", "order"))
312 | db["system_fragments"].transform(pk=("response_id", "fragment_id", "order"))
313 |
314 |
315 | @migration
316 | def m017_tools_tables(db):
317 | db["tools"].create(
318 | {
319 | "id": int,
320 | "hash": str,
321 | "name": str,
322 | "description": str,
323 | "input_schema": str,
324 | },
325 | pk="id",
326 | )
327 | db["tools"].create_index(["hash"], unique=True)
328 | # Many-to-many relationship between tools and responses
329 | db["tool_responses"].create(
330 | {
331 | "tool_id": int,
332 | "response_id": str,
333 | },
334 | foreign_keys=(
335 | ("tool_id", "tools", "id"),
336 | ("response_id", "responses", "id"),
337 | ),
338 | pk=("tool_id", "response_id"),
339 | )
340 | # tool_calls and tool_results are one-to-many against responses
341 | db["tool_calls"].create(
342 | {
343 | "id": int,
344 | "response_id": str,
345 | "tool_id": int,
346 | "name": str,
347 | "arguments": str,
348 | "tool_call_id": str,
349 | },
350 | pk="id",
351 | foreign_keys=(
352 | ("response_id", "responses", "id"),
353 | ("tool_id", "tools", "id"),
354 | ),
355 | )
356 | db["tool_results"].create(
357 | {
358 | "id": int,
359 | "response_id": str,
360 | "tool_id": int,
361 | "name": str,
362 | "output": str,
363 | "tool_call_id": str,
364 | },
365 | pk="id",
366 | foreign_keys=(
367 | ("response_id", "responses", "id"),
368 | ("tool_id", "tools", "id"),
369 | ),
370 | )
371 |
372 |
373 | @migration
374 | def m017_tools_plugin(db):
375 | db["tools"].add_column("plugin")
376 |
377 |
378 | @migration
379 | def m018_tool_instances(db):
380 | # Used to track instances of Toolbox classes that may be
381 | # used multiple times by different tools
382 | db["tool_instances"].create(
383 | {
384 | "id": int,
385 | "plugin": str,
386 | "name": str,
387 | "arguments": str,
388 | },
389 | pk="id",
390 | )
391 | # We record which instance was used only on the results
392 | db["tool_results"].add_column("instance_id", fk="tool_instances")
393 |
394 |
395 | @migration
396 | def m019_resolved_model(db):
397 | # For models like gemini-1.5-flash-latest where we wish to record
398 | # the resolved model name in addition to the alias
399 | db["responses"].add_column("resolved_model", str)
400 |
401 |
402 | @migration
403 | def m020_tool_results_attachments(db):
404 | db["tool_results_attachments"].create(
405 | {
406 | "tool_result_id": int,
407 | "attachment_id": str,
408 | "order": int,
409 | },
410 | foreign_keys=(
411 | ("tool_result_id", "tool_results", "id"),
412 | ("attachment_id", "attachments", "id"),
413 | ),
414 | pk=("tool_result_id", "attachment_id"),
415 | )
416 |
417 |
418 | @migration
419 | def m021_tool_results_exception(db):
420 | db["tool_results"].add_column("exception", str)
421 |
--------------------------------------------------------------------------------
/llm/plugins.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | from importlib import metadata
3 | import os
4 | import pluggy
5 | import sys
6 | from . import hookspecs
7 |
8 | DEFAULT_PLUGINS = (
9 | "llm.default_plugins.openai_models",
10 | "llm.default_plugins.default_tools",
11 | )
12 |
13 | pm = pluggy.PluginManager("llm")
14 | pm.add_hookspecs(hookspecs)
15 |
16 | LLM_LOAD_PLUGINS = os.environ.get("LLM_LOAD_PLUGINS", None)
17 |
18 | _loaded = False
19 |
20 |
21 | def load_plugins():
22 | global _loaded
23 | if _loaded:
24 | return
25 | _loaded = True
26 | if not hasattr(sys, "_called_from_test") and LLM_LOAD_PLUGINS is None:
27 | # Only load plugins if not running tests
28 | pm.load_setuptools_entrypoints("llm")
29 |
30 | # Load any plugins specified in LLM_LOAD_PLUGINS")
31 | if LLM_LOAD_PLUGINS is not None:
32 | for package_name in [
33 | name for name in LLM_LOAD_PLUGINS.split(",") if name.strip()
34 | ]:
35 | try:
36 | distribution = metadata.distribution(package_name) # Updated call
37 | llm_entry_points = [
38 | ep for ep in distribution.entry_points if ep.group == "llm"
39 | ]
40 | for entry_point in llm_entry_points:
41 | mod = entry_point.load()
42 | pm.register(mod, name=entry_point.name)
43 | # Ensure name can be found in plugin_to_distinfo later:
44 | pm._plugin_distinfo.append((mod, distribution)) # type: ignore
45 | except metadata.PackageNotFoundError:
46 | sys.stderr.write(f"Plugin {package_name} could not be found\n")
47 |
48 | for plugin in DEFAULT_PLUGINS:
49 | mod = importlib.import_module(plugin)
50 | pm.register(mod, plugin)
51 |
--------------------------------------------------------------------------------
/llm/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/simonw/llm/2292d7a56df25b7fd457b53bcc7d5cfccdf1821c/llm/py.typed
--------------------------------------------------------------------------------
/llm/templates.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel, ConfigDict
2 | import string
3 | from typing import Optional, Any, Dict, List, Tuple
4 |
5 |
6 | class AttachmentType(BaseModel):
7 | type: str
8 | value: str
9 |
10 |
11 | class Template(BaseModel):
12 | name: str
13 | prompt: Optional[str] = None
14 | system: Optional[str] = None
15 | attachments: Optional[List[str]] = None
16 | attachment_types: Optional[List[AttachmentType]] = None
17 | model: Optional[str] = None
18 | defaults: Optional[Dict[str, Any]] = None
19 | options: Optional[Dict[str, Any]] = None
20 | extract: Optional[bool] = None # For extracting fenced code blocks
21 | extract_last: Optional[bool] = None
22 | schema_object: Optional[dict] = None
23 | fragments: Optional[List[str]] = None
24 | system_fragments: Optional[List[str]] = None
25 | tools: Optional[List[str]] = None
26 | functions: Optional[str] = None
27 |
28 | model_config = ConfigDict(extra="forbid")
29 |
30 | class MissingVariables(Exception):
31 | pass
32 |
33 | def __init__(self, **data):
34 | super().__init__(**data)
35 | # Not a pydantic field to avoid YAML being able to set it
36 | # this controls if Python inline functions code is trusted
37 | self._functions_is_trusted = False
38 |
39 | def evaluate(
40 | self, input: str, params: Optional[Dict[str, Any]] = None
41 | ) -> Tuple[Optional[str], Optional[str]]:
42 | params = params or {}
43 | params["input"] = input
44 | if self.defaults:
45 | for k, v in self.defaults.items():
46 | if k not in params:
47 | params[k] = v
48 | prompt: Optional[str] = None
49 | system: Optional[str] = None
50 | if not self.prompt:
51 | system = self.interpolate(self.system, params)
52 | prompt = input
53 | else:
54 | prompt = self.interpolate(self.prompt, params)
55 | system = self.interpolate(self.system, params)
56 | return prompt, system
57 |
58 | def vars(self) -> set:
59 | all_vars = set()
60 | for text in [self.prompt, self.system]:
61 | if not text:
62 | continue
63 | all_vars.update(self.extract_vars(string.Template(text)))
64 | return all_vars
65 |
66 | @classmethod
67 | def interpolate(cls, text: Optional[str], params: Dict[str, Any]) -> Optional[str]:
68 | if not text:
69 | return text
70 | # Confirm all variables in text are provided
71 | string_template = string.Template(text)
72 | vars = cls.extract_vars(string_template)
73 | missing = [p for p in vars if p not in params]
74 | if missing:
75 | raise cls.MissingVariables(
76 | "Missing variables: {}".format(", ".join(missing))
77 | )
78 | return string_template.substitute(**params)
79 |
80 | @staticmethod
81 | def extract_vars(string_template: string.Template) -> List[str]:
82 | return [
83 | match.group("named")
84 | for match in string_template.pattern.finditer(string_template.template)
85 | if match.group("named")
86 | ]
87 |
--------------------------------------------------------------------------------
/llm/tools.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timezone
2 | from importlib.metadata import version
3 | import time
4 |
5 |
6 | def llm_version() -> str:
7 | "Return the installed version of llm"
8 | return version("llm")
9 |
10 |
11 | def llm_time() -> dict:
12 | "Returns the current time, as local time and UTC"
13 | # Get current times
14 | utc_time = datetime.now(timezone.utc)
15 | local_time = datetime.now()
16 |
17 | # Get timezone information
18 | local_tz_name = time.tzname[time.localtime().tm_isdst]
19 | is_dst = bool(time.localtime().tm_isdst)
20 |
21 | # Calculate offset
22 | offset_seconds = -time.timezone if not is_dst else -time.altzone
23 | offset_hours = offset_seconds // 3600
24 | offset_minutes = (offset_seconds % 3600) // 60
25 |
26 | timezone_offset = (
27 | f"UTC{'+' if offset_hours >= 0 else ''}{offset_hours:02d}:{offset_minutes:02d}"
28 | )
29 |
30 | return {
31 | "utc_time": utc_time.strftime("%Y-%m-%d %H:%M:%S UTC"),
32 | "utc_time_iso": utc_time.isoformat(),
33 | "local_timezone": local_tz_name,
34 | "local_time": local_time.strftime("%Y-%m-%d %H:%M:%S"),
35 | "timezone_offset": timezone_offset,
36 | "is_dst": is_dst,
37 | }
38 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
3 | [mypy-pluggy.*]
4 | ignore_missing_imports = True
5 |
6 | [mypy-click_default_group.*]
7 | ignore_missing_imports = True
8 |
9 | [mypy-sqlite_migrate.*]
10 | ignore_missing_imports = True
11 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "llm"
3 | version = "0.26"
4 | description = "CLI utility and Python library for interacting with Large Language Models from organizations like OpenAI, Anthropic and Gemini plus local models installed on your own machine."
5 | readme = { file = "README.md", content-type = "text/markdown" }
6 | authors = [
7 | { name = "Simon Willison" },
8 | ]
9 | license = "Apache-2.0"
10 | requires-python = ">=3.9"
11 | classifiers = [
12 | "Development Status :: 4 - Beta",
13 | "Intended Audience :: Developers",
14 | "Intended Audience :: End Users/Desktop",
15 | "Intended Audience :: Science/Research",
16 | "Programming Language :: Python :: 3",
17 | "Programming Language :: Python :: 3.9",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: 3.11",
20 | "Programming Language :: Python :: 3.12",
21 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
22 | "Topic :: Text Processing :: Linguistic",
23 | "Topic :: Utilities",
24 | ]
25 |
26 | dependencies = [
27 | "click",
28 | "condense-json>=0.1.3",
29 | "openai>=1.55.3",
30 | "click-default-group>=1.2.3",
31 | "sqlite-utils>=3.37",
32 | "sqlite-migrate>=0.1a2",
33 | "pydantic>=2.0.0",
34 | "PyYAML",
35 | "pluggy",
36 | "python-ulid",
37 | "setuptools",
38 | "pip",
39 | "pyreadline3; sys_platform == 'win32'",
40 | "puremagic",
41 | ]
42 |
43 | [project.urls]
44 | Homepage = "https://github.com/simonw/llm"
45 | Documentation = "https://llm.datasette.io/"
46 | Issues = "https://github.com/simonw/llm/issues"
47 | CI = "https://github.com/simonw/llm/actions"
48 | Changelog = "https://github.com/simonw/llm/releases"
49 |
50 | [project.scripts]
51 | llm = "llm.cli:cli"
52 |
53 | [project.optional-dependencies]
54 | test = [
55 | "build",
56 | "click<8.2.0", # https://github.com/simonw/llm/issues/1024
57 | "pytest",
58 | "numpy",
59 | "pytest-httpx>=0.33.0",
60 | "pytest-asyncio",
61 | "cogapp",
62 | "mypy>=1.10.0",
63 | "black>=25.1.0",
64 | "pytest-recording",
65 | "ruff",
66 | "syrupy",
67 | "types-click",
68 | "types-PyYAML",
69 | "types-setuptools",
70 | "llm-echo==0.3a3",
71 | ]
72 |
73 | [build-system]
74 | requires = ["setuptools"]
75 | build-backend = "setuptools.build_meta"
76 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | asyncio_default_fixture_loop_scope = function
--------------------------------------------------------------------------------
/ruff.toml:
--------------------------------------------------------------------------------
1 | line-length = 160
2 |
--------------------------------------------------------------------------------
/tests/cassettes/test_tools/test_tool_use_chain_of_two_calls.yaml:
--------------------------------------------------------------------------------
1 | interactions:
2 | - request:
3 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
4 | dragons? Answer with only YES or NO"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
5 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
6 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
7 | headers:
8 | accept:
9 | - application/json
10 | accept-encoding:
11 | - gzip, deflate
12 | connection:
13 | - keep-alive
14 | content-length:
15 | - '650'
16 | content-type:
17 | - application/json
18 | host:
19 | - api.openai.com
20 | user-agent:
21 | - OpenAI/Python 1.78.0
22 | x-stainless-arch:
23 | - arm64
24 | x-stainless-async:
25 | - 'false'
26 | x-stainless-lang:
27 | - python
28 | x-stainless-os:
29 | - MacOS
30 | x-stainless-package-version:
31 | - 1.78.0
32 | x-stainless-read-timeout:
33 | - '600'
34 | x-stainless-retry-count:
35 | - '0'
36 | x-stainless-runtime:
37 | - CPython
38 | x-stainless-runtime-version:
39 | - 3.13.3
40 | method: POST
41 | uri: https://api.openai.com/v1/chat/completions
42 | response:
43 | body:
44 | string: !!binary |
45 | H4sIAAAAAAAAAwAAAP//jFPBjtowEL3nK6w5kyrJ0gI5slXppWzbZbdqyyoyziS4OLZrO1sQ4t+r
46 | GEjCLpWaQ2TNm/fmzYy9DwgBnkNKgK2pY5UW4fSbns1n+ee7+eLP9uPmx+N29Zu5L/fT99VCwqBh
47 | qNUvZO7MesNUpQU6rk4wM0gdNqrxaDiK390kb2MPVCpH0dBK7cKhCisueZhEyTCMRmE8PrHXijO0
48 | kJKfASGE7P2/8Slz3EJKosE5UqG1tERI2yRCwCjRRIBay62j0sGgA5mSDmVjXdZC9ACnlMgYFaIr
49 | fPz2vXM3LCpEtlh8Hz98mKuRmd/Su+nD3Imv98+fZr16R+md9oaKWrJ2SD28jacvihECklaeK5Ta
50 | 1DrTSteCXhEhBKgp6wqlaxqA/RKYqqUzuyWkS7g1daXRLeEAF7RDcO381JuLwaK2VLweGJVSOW/F
51 | T+zphBza5QhVaqNW9gUVCi65XWcGqfU990cfnI14C1BfbBe0UZV2mVMb9EUnyVEUugvYgfHoBDrl
52 | qOjFo8ngilyWo6Pcb7+9cIyyNeYdtbt4tM656gFBr/XXbq5pH9vnsvwf+Q5gDLXDPNMGc84uO+7S
53 | DDbv819p7ZC9YbBonjnDzHE0zTpyLGgtjq8G7M46rLKCyxKNNtw/HSh0Ft1MknGSRJMIgkPwFwAA
54 | //8DALof6VxIBAAA
55 | headers:
56 | CF-RAY:
57 | - 93f47072dde6f88d-IAD
58 | Connection:
59 | - keep-alive
60 | Content-Encoding:
61 | - gzip
62 | Content-Type:
63 | - application/json
64 | Date:
65 | - Tue, 13 May 2025 19:07:32 GMT
66 | Server:
67 | - cloudflare
68 | Set-Cookie:
69 | - __cf_bm=vfHkbLfwVTTGPkFT0I4U0xn5CHQZYIpOutDV4z7NRlA-1747163252-1.0.1.1-kj_JiiyNxn9AWCWisV6.pYNShKVqqT0Foicji2.ZLNaAkHm5VEwac0QjxVhCiWQs9Xp_wvkeTzrgVxmD8bkzDwTPn96U.81YERXZda3_m18;
70 | path=/; expires=Tue, 13-May-25 19:37:32 GMT; domain=.api.openai.com; HttpOnly;
71 | Secure; SameSite=None
72 | - _cfuvid=SQgXKMy2qkeOsbwwTl62blvuirTS_TkZSvEOztbYIlI-1747163252293-0.0.1.1-604800000;
73 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
74 | Transfer-Encoding:
75 | - chunked
76 | X-Content-Type-Options:
77 | - nosniff
78 | access-control-expose-headers:
79 | - X-Request-ID
80 | alt-svc:
81 | - h3=":443"; ma=86400
82 | cf-cache-status:
83 | - DYNAMIC
84 | openai-organization:
85 | - user-r3e61fpak04cbaokp5buoae4
86 | openai-processing-ms:
87 | - '574'
88 | openai-version:
89 | - '2020-10-01'
90 | strict-transport-security:
91 | - max-age=31536000; includeSubDomains; preload
92 | x-envoy-upstream-service-time:
93 | - '591'
94 | x-ratelimit-limit-requests:
95 | - '30000'
96 | x-ratelimit-limit-tokens:
97 | - '150000000'
98 | x-ratelimit-remaining-requests:
99 | - '29999'
100 | x-ratelimit-remaining-tokens:
101 | - '149999981'
102 | x-ratelimit-reset-requests:
103 | - 2ms
104 | x-ratelimit-reset-tokens:
105 | - 0s
106 | x-request-id:
107 | - req_1e7dabaf1f0dba1ec89a134d3bde8476
108 | status:
109 | code: 200
110 | message: OK
111 | - request:
112 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
113 | dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\":
114 | \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
115 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
116 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
117 | headers:
118 | accept:
119 | - application/json
120 | accept-encoding:
121 | - gzip, deflate
122 | connection:
123 | - keep-alive
124 | content-length:
125 | - '906'
126 | content-type:
127 | - application/json
128 | host:
129 | - api.openai.com
130 | user-agent:
131 | - OpenAI/Python 1.78.0
132 | x-stainless-arch:
133 | - arm64
134 | x-stainless-async:
135 | - 'false'
136 | x-stainless-lang:
137 | - python
138 | x-stainless-os:
139 | - MacOS
140 | x-stainless-package-version:
141 | - 1.78.0
142 | x-stainless-read-timeout:
143 | - '600'
144 | x-stainless-retry-count:
145 | - '0'
146 | x-stainless-runtime:
147 | - CPython
148 | x-stainless-runtime-version:
149 | - 3.13.3
150 | method: POST
151 | uri: https://api.openai.com/v1/chat/completions
152 | response:
153 | body:
154 | string: !!binary |
155 | H4sIAAAAAAAAA4xTTYvbMBC9+1eIOcfFH2k+fNyWlEIPLaWkm+5itNLY0UaWVEkOzYb892J7YzvZ
156 | FOqDEfPmvXkzIx0DQkBwyAiwLfWsMjK8W5tP39a7es+i1YZvNs9fXxb4Jf7A/R1bwaRh6KdnZP7M
157 | esd0ZSR6oVUHM4vUY6Maz6fzeJYm76ctUGmOsqGVxodTHVZCiTCJkmkYzcN48creasHQQUZ+BYQQ
158 | cmz/jU/F8Q9kJJqcIxU6R0uErE8iBKyWTQSoc8J5qjxMBpBp5VE11lUt5QjwWsucUSmHwt13HJ2H
159 | YVEpc/p7+eMgvq92Lz9n68U9Z2n6UX9e3o/qddIH0xoqasX6IY3wPp5dFSMEFK2wK6jyLd1jzi0t
160 | tXJXGoQAtWVdofKNfzg+gNGmlrTRfYAsTtI4mZ7ggnQKbp0fR0OxWNSOyrfTokpp34q343p8RU79
161 | ZqQujdVP7ooKhVDCbXOL1LUNj+cenI20FqC+WC0Yqyvjc6932BaN40WnCsP1G6Fn0GtP5SieziY3
162 | 9HKOnop29/11Y5RtkQ/U4drRmgs9AoJR72/d3NLu+heq/B/5AWAMjUeeG4tcsMuOhzSLzev8V1o/
163 | 5dYwOLR7wTD3Am2zD44FrWX3ZsAdnMcqL4Qq0Ror2ocDhcmjdJkskiRaRhCcgr8AAAD//wMAmw02
164 | QkYEAAA=
165 | headers:
166 | CF-RAY:
167 | - 93f47082ba71d640-IAD
168 | Connection:
169 | - keep-alive
170 | Content-Encoding:
171 | - gzip
172 | Content-Type:
173 | - application/json
174 | Date:
175 | - Tue, 13 May 2025 19:07:35 GMT
176 | Server:
177 | - cloudflare
178 | Set-Cookie:
179 | - __cf_bm=LL6YtOWVW4fA687_GIMcuJC7CM2I.uKx1vGaNkjFTgo-1747163255-1.0.1.1-qML6IsLM49e2bg7zp0uGqn3.JTJP5KlFYfb8o3v9LzyLb.cYoFBXn5te83Wxl5kVjDiXU2vH.QTFQu953KNx87LwsMkI2ZxTvH58oZWAawg;
180 | path=/; expires=Tue, 13-May-25 19:37:35 GMT; domain=.api.openai.com; HttpOnly;
181 | Secure; SameSite=None
182 | - _cfuvid=QOa3sx0F4_nAYKtjmx9ux7qfIsyipGZq94AL_SWd2ac-1747163255176-0.0.1.1-604800000;
183 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
184 | Transfer-Encoding:
185 | - chunked
186 | X-Content-Type-Options:
187 | - nosniff
188 | access-control-expose-headers:
189 | - X-Request-ID
190 | alt-svc:
191 | - h3=":443"; ma=86400
192 | cf-cache-status:
193 | - DYNAMIC
194 | openai-organization:
195 | - user-r3e61fpak04cbaokp5buoae4
196 | openai-processing-ms:
197 | - '575'
198 | openai-version:
199 | - '2020-10-01'
200 | strict-transport-security:
201 | - max-age=31536000; includeSubDomains; preload
202 | x-envoy-upstream-service-time:
203 | - '587'
204 | x-ratelimit-limit-requests:
205 | - '30000'
206 | x-ratelimit-limit-tokens:
207 | - '150000000'
208 | x-ratelimit-remaining-requests:
209 | - '29999'
210 | x-ratelimit-remaining-tokens:
211 | - '149999976'
212 | x-ratelimit-reset-requests:
213 | - 2ms
214 | x-ratelimit-reset-tokens:
215 | - 0s
216 | x-request-id:
217 | - req_66cc3b2bbe3be82a37d29fba7672d82b
218 | status:
219 | code: 200
220 | message: OK
221 | - request:
222 | body: '{"messages":[{"role":"user","content":"Can the country of Crumpet have
223 | dragons? Answer with only YES or NO"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","function":{"name":"lookup_population","arguments":"{\"country\":
224 | \"Crumpet\"}"}}]},{"role":"tool","tool_call_id":"call_TTY8UFNo7rNCaOBUNtlRSvMG","content":"123124"},{"role":"assistant","tool_calls":[{"type":"function","id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","function":{"name":"can_have_dragons","arguments":"{\"population\":
225 | 123124}"}}]},{"role":"tool","tool_call_id":"call_aq9UyiSFkzX6W8Ydc33DoI9Y","content":"true"}],"model":"gpt-4o-mini","stream":false,"tools":[{"type":"function","function":{"name":"lookup_population","description":"Returns
226 | the current population of the specified fictional country","parameters":{"properties":{"country":{"type":"string"}},"required":["country"],"type":"object"}}},{"type":"function","function":{"name":"can_have_dragons","description":"Returns
227 | True if the specified population can have dragons, False otherwise","parameters":{"properties":{"population":{"type":"integer"}},"required":["population"],"type":"object"}}}]}'
228 | headers:
229 | accept:
230 | - application/json
231 | accept-encoding:
232 | - gzip, deflate
233 | connection:
234 | - keep-alive
235 | content-length:
236 | - '1157'
237 | content-type:
238 | - application/json
239 | host:
240 | - api.openai.com
241 | user-agent:
242 | - OpenAI/Python 1.78.0
243 | x-stainless-arch:
244 | - arm64
245 | x-stainless-async:
246 | - 'false'
247 | x-stainless-lang:
248 | - python
249 | x-stainless-os:
250 | - MacOS
251 | x-stainless-package-version:
252 | - 1.78.0
253 | x-stainless-read-timeout:
254 | - '600'
255 | x-stainless-retry-count:
256 | - '0'
257 | x-stainless-runtime:
258 | - CPython
259 | x-stainless-runtime-version:
260 | - 3.13.3
261 | method: POST
262 | uri: https://api.openai.com/v1/chat/completions
263 | response:
264 | body:
265 | string: !!binary |
266 | H4sIAAAAAAAAAwAAAP//jJJBb9swDIXv/hUCz/HgOGmd5NYW2447bNjQDIWhSLSjThYFiS42FPnv
267 | g+w0drcO2EUHfXzUexSfMyHAaNgJUEfJqvM2v/3mP37Z31ebq69xb/zdp+Jw8/Sh2lf8qG9gkRR0
268 | eETFL6p3ijpvkQ25EauAkjF1XVbranm9Kq+qAXSk0SZZ6zlfU94ZZ/KyKNd5UeXLzVl9JKMwwk58
269 | z4QQ4nk4k0+n8SfsRLF4uekwRtki7C5FQkAgm25AxmgiS8ewmKAix+gG6/fvP89JwKaPMrlzvbUz
270 | IJ0jlind4OnhTE4XF5ZaH+gQ/5BCY5yJxzqgjOTSi5HJw0BPmRAPQ9r+VQDwgTrPNdMPHJ5brq/H
271 | fjANeaKrM2Niaeei7eKNdrVGlsbG2bhASXVEPUmn2cpeG5qBbBb6bzNv9R6DG9f+T/sJKIWeUdc+
272 | oDbqdeCpLGBawX+VXYY8GIaI4ckorNlgSB+hsZG9HRcD4q/I2NWNcS0GH8y4HY2vi9W23JRlsS0g
273 | O2W/AQAA//8DAFbEZUIrAwAA
274 | headers:
275 | CF-RAY:
276 | - 93f47096cf15d6e9-IAD
277 | Connection:
278 | - keep-alive
279 | Content-Encoding:
280 | - gzip
281 | Content-Type:
282 | - application/json
283 | Date:
284 | - Tue, 13 May 2025 19:07:37 GMT
285 | Server:
286 | - cloudflare
287 | Set-Cookie:
288 | - __cf_bm=EDR.bZeRmrWVNTWef5aAJ2C5NT7yIBHq_6NzNGXNlX0-1747163257-1.0.1.1-YuS4Hj.Ncp4eOrYNT5L7AncdqT5Xn8a2DTxCka1HKKBGKdT8k70yvNTA3wMlQyVPxGD3HSCysY0a1n1zCkNs._TQe9hWOuoIDG9LtD9MBr4;
289 | path=/; expires=Tue, 13-May-25 19:37:37 GMT; domain=.api.openai.com; HttpOnly;
290 | Secure; SameSite=None
291 | - _cfuvid=3Xqq8l5nvU4mfyEz4.llgkHC3jY.IBLFTJrD76P7UsY-1747163257692-0.0.1.1-604800000;
292 | path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
293 | Transfer-Encoding:
294 | - chunked
295 | X-Content-Type-Options:
296 | - nosniff
297 | access-control-expose-headers:
298 | - X-Request-ID
299 | alt-svc:
300 | - h3=":443"; ma=86400
301 | cf-cache-status:
302 | - DYNAMIC
303 | openai-organization:
304 | - user-r3e61fpak04cbaokp5buoae4
305 | openai-processing-ms:
306 | - '222'
307 | openai-version:
308 | - '2020-10-01'
309 | strict-transport-security:
310 | - max-age=31536000; includeSubDomains; preload
311 | x-envoy-upstream-service-time:
312 | - '227'
313 | x-ratelimit-limit-requests:
314 | - '30000'
315 | x-ratelimit-limit-tokens:
316 | - '150000000'
317 | x-ratelimit-remaining-requests:
318 | - '29999'
319 | x-ratelimit-remaining-tokens:
320 | - '149999974'
321 | x-ratelimit-reset-requests:
322 | - 2ms
323 | x-ratelimit-reset-tokens:
324 | - 0s
325 | x-request-id:
326 | - req_d157a5a0f4b64776bc387ccab624e664
327 | status:
328 | code: 200
329 | message: OK
330 | version: 1
331 |
--------------------------------------------------------------------------------
/tests/test-llm-load-plugins.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This should only run in environments where both
3 | # llm-cluster and llm-mistral are installed
4 |
5 | PLUGINS=$(llm plugins)
6 | echo "$PLUGINS" | jq 'any(.[]; .name == "llm-mistral")' | \
7 | grep -q true || ( \
8 | echo "Test failed: llm-mistral not found" && \
9 | exit 1 \
10 | )
11 | # With the LLM_LOAD_PLUGINS we should not see that
12 | PLUGINS2=$(LLM_LOAD_PLUGINS=llm-cluster llm plugins)
13 | echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-mistral")' | \
14 | grep -q false || ( \
15 | echo "Test failed: llm-mistral should not have been loaded" && \
16 | exit 1 \
17 | )
18 | echo "$PLUGINS2" | jq 'any(.[]; .name == "llm-cluster")' | \
19 | grep -q true || ( \
20 | echo "Test llm-cluster should have been loaded" && \
21 | exit 1 \
22 | )
23 | # With LLM_LOAD_PLUGINS='' we should see no plugins
24 | PLUGINS3=$(LLM_LOAD_PLUGINS='' llm plugins)
25 | echo "$PLUGINS3"| \
26 | grep -q '\[\]' || ( \
27 | echo "Test failed: plugins should have returned []" && \
28 | exit 1 \
29 | )
30 |
--------------------------------------------------------------------------------
/tests/test_aliases.py:
--------------------------------------------------------------------------------
1 | from click.testing import CliRunner
2 | from llm.cli import cli
3 | import llm
4 | import json
5 | import pytest
6 | import re
7 |
8 |
9 | @pytest.mark.parametrize("model_id_or_alias", ("gpt-3.5-turbo", "chatgpt"))
10 | def test_set_alias(model_id_or_alias):
11 | with pytest.raises(llm.UnknownModelError):
12 | llm.get_model("this-is-a-new-alias")
13 | llm.set_alias("this-is-a-new-alias", model_id_or_alias)
14 | assert llm.get_model("this-is-a-new-alias").model_id == "gpt-3.5-turbo"
15 |
16 |
17 | def test_remove_alias():
18 | with pytest.raises(KeyError):
19 | llm.remove_alias("some-other-alias")
20 | llm.set_alias("some-other-alias", "gpt-3.5-turbo")
21 | assert llm.get_model("some-other-alias").model_id == "gpt-3.5-turbo"
22 | llm.remove_alias("some-other-alias")
23 | with pytest.raises(llm.UnknownModelError):
24 | llm.get_model("some-other-alias")
25 |
26 |
27 | @pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"]))
28 | def test_cli_aliases_list(args):
29 | llm.set_alias("e-demo", "embed-demo")
30 | runner = CliRunner()
31 | result = runner.invoke(cli, args)
32 | assert result.exit_code == 0
33 | for line in (
34 | "3.5 : gpt-3.5-turbo\n"
35 | "chatgpt : gpt-3.5-turbo\n"
36 | "chatgpt-16k : gpt-3.5-turbo-16k\n"
37 | "3.5-16k : gpt-3.5-turbo-16k\n"
38 | "4 : gpt-4\n"
39 | "gpt4 : gpt-4\n"
40 | "4-32k : gpt-4-32k\n"
41 | "e-demo : embed-demo (embedding)\n"
42 | "ada : text-embedding-ada-002 (embedding)\n"
43 | ).split("\n"):
44 | line = line.strip()
45 | if not line:
46 | continue
47 | # Turn the whitespace into a regex
48 | regex = r"\s+".join(re.escape(part) for part in line.split())
49 | assert re.search(regex, result.output)
50 |
51 |
52 | @pytest.mark.parametrize("args", (["aliases", "list"], ["aliases"]))
53 | def test_cli_aliases_list_json(args):
54 | llm.set_alias("e-demo", "embed-demo")
55 | runner = CliRunner()
56 | result = runner.invoke(cli, args + ["--json"])
57 | assert result.exit_code == 0
58 | assert (
59 | json.loads(result.output).items()
60 | >= {
61 | "3.5": "gpt-3.5-turbo",
62 | "chatgpt": "gpt-3.5-turbo",
63 | "chatgpt-16k": "gpt-3.5-turbo-16k",
64 | "3.5-16k": "gpt-3.5-turbo-16k",
65 | "4": "gpt-4",
66 | "gpt4": "gpt-4",
67 | "4-32k": "gpt-4-32k",
68 | "ada": "text-embedding-ada-002",
69 | "e-demo": "embed-demo",
70 | }.items()
71 | )
72 |
73 |
74 | @pytest.mark.parametrize(
75 | "args,expected,expected_error",
76 | (
77 | (["foo", "bar"], {"foo": "bar"}, None),
78 | (["foo", "-q", "mo"], {"foo": "mock"}, None),
79 | (["foo", "-q", "mog"], None, "No model found matching query: mog"),
80 | ),
81 | )
82 | def test_cli_aliases_set(user_path, args, expected, expected_error):
83 | # Should be not aliases.json at start
84 | assert not (user_path / "aliases.json").exists()
85 | runner = CliRunner()
86 | result = runner.invoke(cli, ["aliases", "set"] + args)
87 | if not expected_error:
88 | assert result.exit_code == 0
89 | assert (user_path / "aliases.json").exists()
90 | assert json.loads((user_path / "aliases.json").read_text("utf-8")) == expected
91 | else:
92 | assert result.exit_code == 1
93 | assert result.output.strip() == f"Error: {expected_error}"
94 |
95 |
96 | def test_cli_aliases_path(user_path):
97 | runner = CliRunner()
98 | result = runner.invoke(cli, ["aliases", "path"])
99 | assert result.exit_code == 0
100 | assert result.output.strip() == str(user_path / "aliases.json")
101 |
102 |
103 | def test_cli_aliases_remove(user_path):
104 | (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8")
105 | runner = CliRunner()
106 | result = runner.invoke(cli, ["aliases", "remove", "foo"])
107 | assert result.exit_code == 0
108 | assert json.loads((user_path / "aliases.json").read_text("utf-8")) == {}
109 |
110 |
111 | def test_cli_aliases_remove_invalid(user_path):
112 | (user_path / "aliases.json").write_text(json.dumps({"foo": "bar"}), "utf-8")
113 | runner = CliRunner()
114 | result = runner.invoke(cli, ["aliases", "remove", "invalid"])
115 | assert result.exit_code == 1
116 | assert result.output == "Error: No such alias: invalid\n"
117 |
118 |
119 | @pytest.mark.parametrize("args", (["models"], ["models", "list"]))
120 | def test_cli_aliases_are_registered(user_path, args):
121 | (user_path / "aliases.json").write_text(
122 | json.dumps({"foo": "bar", "turbo": "gpt-3.5-turbo"}), "utf-8"
123 | )
124 | runner = CliRunner()
125 | result = runner.invoke(cli, args)
126 | assert result.exit_code == 0
127 | # Check for model line only, without keys, as --options is not used
128 | assert "gpt-3.5-turbo (aliases: 3.5, chatgpt, turbo)" in result.output
129 |
--------------------------------------------------------------------------------
/tests/test_async.py:
--------------------------------------------------------------------------------
1 | import llm
2 | import pytest
3 |
4 |
5 | @pytest.mark.asyncio
6 | async def test_async_model(async_mock_model):
7 | gathered = []
8 | async_mock_model.enqueue(["hello world"])
9 | async for chunk in async_mock_model.prompt("hello"):
10 | gathered.append(chunk)
11 | assert gathered == ["hello world"]
12 | # Not as an iterator
13 | async_mock_model.enqueue(["hello world"])
14 | response = await async_mock_model.prompt("hello")
15 | text = await response.text()
16 | assert text == "hello world"
17 | assert isinstance(response, llm.AsyncResponse)
18 | usage = await response.usage()
19 | assert usage.input == 1
20 | assert usage.output == 1
21 | assert usage.details is None
22 |
23 |
24 | @pytest.mark.asyncio
25 | async def test_async_model_conversation(async_mock_model):
26 | async_mock_model.enqueue(["joke 1"])
27 | conversation = async_mock_model.conversation()
28 | response = await conversation.prompt("joke")
29 | text = await response.text()
30 | assert text == "joke 1"
31 | async_mock_model.enqueue(["joke 2"])
32 | response2 = await conversation.prompt("again")
33 | text2 = await response2.text()
34 | assert text2 == "joke 2"
35 |
36 |
37 | @pytest.mark.asyncio
38 | async def test_async_on_done(async_mock_model):
39 | async_mock_model.enqueue(["hello world"])
40 | response = await async_mock_model.prompt(prompt="hello")
41 | caught = []
42 |
43 | def done(response):
44 | caught.append(response)
45 |
46 | assert len(caught) == 0
47 | await response.on_done(done)
48 | await response.text()
49 | assert response._done
50 | assert len(caught) == 1
51 |
52 |
53 | @pytest.mark.asyncio
54 | async def test_async_conversation(async_mock_model):
55 | async_mock_model.enqueue(["one"])
56 | conversation = async_mock_model.conversation()
57 | response1 = await conversation.prompt("hi").text()
58 | async_mock_model.enqueue(["two"])
59 | response2 = await conversation.prompt("hi").text()
60 | assert response1 == "one"
61 | assert response2 == "two"
62 |
--------------------------------------------------------------------------------
/tests/test_attachments.py:
--------------------------------------------------------------------------------
1 | from click.testing import CliRunner
2 | from unittest.mock import ANY
3 | import llm
4 | from llm import cli
5 | import pytest
6 |
7 | TINY_PNG = (
8 | b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\xa6\x00\x00\x01\x1a"
9 | b"\x02\x03\x00\x00\x00\xe6\x99\xc4^\x00\x00\x00\tPLTE\xff\xff\xff"
10 | b"\x00\xff\x00\xfe\x01\x00\x12t\x01J\x00\x00\x00GIDATx\xda\xed\xd81\x11"
11 | b"\x000\x08\xc0\xc0.]\xea\xaf&Q\x89\x04V\xe0>\xf3+\xc8\x91Z\xf4\xa2\x08EQ\x14E"
12 | b"Q\x14EQ\x14EQ\xd4B\x91$I3\xbb\xbf\x08EQ\x14EQ\x14EQ\x14E\xd1\xa5"
13 | b"\xd4\x17\x91\xc6\x95\x05\x15\x0f\x9f\xc5\t\x9f\xa4\x00\x00\x00\x00IEND\xaeB`"
14 | b"\x82"
15 | )
16 |
17 | TINY_WAV = b"RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00D\xac\x00\x00"
18 |
19 |
20 | @pytest.mark.parametrize(
21 | "attachment_type,attachment_content",
22 | [
23 | ("image/png", TINY_PNG),
24 | ("audio/wav", TINY_WAV),
25 | ],
26 | )
27 | def test_prompt_attachment(mock_model, logs_db, attachment_type, attachment_content):
28 | runner = CliRunner()
29 | mock_model.enqueue(["two boxes"])
30 | result = runner.invoke(
31 | cli.cli,
32 | ["prompt", "-m", "mock", "describe file", "-a", "-"],
33 | input=attachment_content,
34 | catch_exceptions=False,
35 | )
36 | assert result.exit_code == 0, result.output
37 | assert result.output == "two boxes\n"
38 | assert mock_model.history[0][0].attachments[0] == llm.Attachment(
39 | type=attachment_type, path=None, url=None, content=attachment_content, _id=ANY
40 | )
41 |
42 | # Check it was logged correctly
43 | conversations = list(logs_db["conversations"].rows)
44 | assert len(conversations) == 1
45 | conversation = conversations[0]
46 | assert conversation["model"] == "mock"
47 | assert conversation["name"] == "describe file"
48 | response = list(logs_db["responses"].rows)[0]
49 | attachment = list(logs_db["attachments"].rows)[0]
50 | assert attachment == {
51 | "id": ANY,
52 | "type": attachment_type,
53 | "path": None,
54 | "url": None,
55 | "content": attachment_content,
56 | }
57 | prompt_attachment = list(logs_db["prompt_attachments"].rows)[0]
58 | assert prompt_attachment["attachment_id"] == attachment["id"]
59 | assert prompt_attachment["response_id"] == response["id"]
60 |
--------------------------------------------------------------------------------
/tests/test_cli_openai_models.py:
--------------------------------------------------------------------------------
1 | from click.testing import CliRunner
2 | from llm.cli import cli
3 | import pytest
4 | import sqlite_utils
5 |
6 |
7 | @pytest.fixture
8 | def mocked_models(httpx_mock):
9 | httpx_mock.add_response(
10 | method="GET",
11 | url="https://api.openai.com/v1/models",
12 | json={
13 | "data": [
14 | {
15 | "id": "ada:2020-05-03",
16 | "object": "model",
17 | "created": 1588537600,
18 | "owned_by": "openai",
19 | },
20 | {
21 | "id": "babbage:2020-05-03",
22 | "object": "model",
23 | "created": 1588537600,
24 | "owned_by": "openai",
25 | },
26 | ]
27 | },
28 | headers={"Content-Type": "application/json"},
29 | )
30 | return httpx_mock
31 |
32 |
33 | def test_openai_models(mocked_models):
34 | runner = CliRunner()
35 | result = runner.invoke(cli, ["openai", "models", "--key", "x"])
36 | assert result.exit_code == 0
37 | assert result.output == (
38 | "id owned_by created \n"
39 | "ada:2020-05-03 openai 2020-05-03T20:26:40+00:00\n"
40 | "babbage:2020-05-03 openai 2020-05-03T20:26:40+00:00\n"
41 | )
42 |
43 |
44 | def test_openai_options_min_max():
45 | options = {
46 | "temperature": [0, 2],
47 | "top_p": [0, 1],
48 | "frequency_penalty": [-2, 2],
49 | "presence_penalty": [-2, 2],
50 | }
51 | runner = CliRunner()
52 |
53 | for option, [min_val, max_val] in options.items():
54 | result = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "-10"])
55 | assert result.exit_code == 1
56 | assert f"greater than or equal to {min_val}" in result.output
57 | result2 = runner.invoke(cli, ["-m", "chatgpt", "-o", option, "10"])
58 | assert result2.exit_code == 1
59 | assert f"less than or equal to {max_val}" in result2.output
60 |
61 |
62 | @pytest.mark.parametrize("model", ("gpt-4o-mini", "gpt-4o-audio-preview"))
63 | @pytest.mark.parametrize("filetype", ("mp3", "wav"))
64 | def test_only_gpt4_audio_preview_allows_mp3_or_wav(httpx_mock, model, filetype):
65 | httpx_mock.add_response(
66 | method="HEAD",
67 | url=f"https://www.example.com/example.{filetype}",
68 | content=b"binary-data",
69 | headers={"Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav"},
70 | )
71 | if model == "gpt-4o-audio-preview":
72 | httpx_mock.add_response(
73 | method="POST",
74 | # chat completion request
75 | url="https://api.openai.com/v1/chat/completions",
76 | json={
77 | "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh",
78 | "object": "chat.completion",
79 | "created": 1730871958,
80 | "model": "gpt-4o-audio-preview-2024-10-01",
81 | "choices": [
82 | {
83 | "index": 0,
84 | "message": {
85 | "role": "assistant",
86 | "content": "Why did the pelican get kicked out of the restaurant?\n\nBecause he had a big bill and no way to pay it!",
87 | "refusal": None,
88 | },
89 | "finish_reason": "stop",
90 | }
91 | ],
92 | "usage": {
93 | "prompt_tokens": 55,
94 | "completion_tokens": 25,
95 | "total_tokens": 80,
96 | "prompt_tokens_details": {
97 | "cached_tokens": 0,
98 | "audio_tokens": 44,
99 | "text_tokens": 11,
100 | "image_tokens": 0,
101 | },
102 | "completion_tokens_details": {
103 | "reasoning_tokens": 0,
104 | "audio_tokens": 0,
105 | "text_tokens": 25,
106 | "accepted_prediction_tokens": 0,
107 | "rejected_prediction_tokens": 0,
108 | },
109 | },
110 | "system_fingerprint": "fp_49254d0e9b",
111 | },
112 | headers={"Content-Type": "application/json"},
113 | )
114 | httpx_mock.add_response(
115 | method="GET",
116 | url=f"https://www.example.com/example.{filetype}",
117 | content=b"binary-data",
118 | headers={
119 | "Content-Type": "audio/mpeg" if filetype == "mp3" else "audio/wav"
120 | },
121 | )
122 | runner = CliRunner()
123 | result = runner.invoke(
124 | cli,
125 | [
126 | "-m",
127 | model,
128 | "-a",
129 | f"https://www.example.com/example.{filetype}",
130 | "--no-stream",
131 | "--key",
132 | "x",
133 | ],
134 | )
135 | if model == "gpt-4o-audio-preview":
136 | assert result.exit_code == 0
137 | assert result.output == (
138 | "Why did the pelican get kicked out of the restaurant?\n\n"
139 | "Because he had a big bill and no way to pay it!\n"
140 | )
141 | else:
142 | assert result.exit_code == 1
143 | long = "audio/mpeg" if filetype == "mp3" else "audio/wav"
144 | assert (
145 | f"This model does not support attachments of type '{long}'" in result.output
146 | )
147 |
148 |
149 | @pytest.mark.parametrize("async_", (False, True))
150 | @pytest.mark.parametrize("usage", (None, "-u", "--usage"))
151 | def test_gpt4o_mini_sync_and_async(monkeypatch, tmpdir, httpx_mock, async_, usage):
152 | user_path = tmpdir / "user_dir"
153 | log_db = user_path / "logs.db"
154 | monkeypatch.setenv("LLM_USER_PATH", str(user_path))
155 | assert not log_db.exists()
156 | httpx_mock.add_response(
157 | method="POST",
158 | # chat completion request
159 | url="https://api.openai.com/v1/chat/completions",
160 | json={
161 | "id": "chatcmpl-AQT9a30kxEaM1bqxRPepQsPlCyGJh",
162 | "object": "chat.completion",
163 | "created": 1730871958,
164 | "model": "gpt-4o-mini",
165 | "choices": [
166 | {
167 | "index": 0,
168 | "message": {
169 | "role": "assistant",
170 | "content": "Ho ho ho",
171 | "refusal": None,
172 | },
173 | "finish_reason": "stop",
174 | }
175 | ],
176 | "usage": {
177 | "prompt_tokens": 1000,
178 | "completion_tokens": 2000,
179 | "total_tokens": 12,
180 | },
181 | "system_fingerprint": "fp_49254d0e9b",
182 | },
183 | headers={"Content-Type": "application/json"},
184 | )
185 | runner = CliRunner(mix_stderr=False)
186 | args = ["-m", "gpt-4o-mini", "--key", "x", "--no-stream"]
187 | if usage:
188 | args.append(usage)
189 | if async_:
190 | args.append("--async")
191 | result = runner.invoke(cli, args, catch_exceptions=False)
192 | assert result.exit_code == 0
193 | assert result.output == "Ho ho ho\n"
194 | if usage:
195 | assert result.stderr == "Token usage: 1,000 input, 2,000 output\n"
196 | # Confirm it was correctly logged
197 | assert log_db.exists()
198 | db = sqlite_utils.Database(str(log_db))
199 | assert db["responses"].count == 1
200 | row = next(db["responses"].rows)
201 | assert row["response"] == "Ho ho ho"
202 |
--------------------------------------------------------------------------------
/tests/test_cli_options.py:
--------------------------------------------------------------------------------
1 | from click.testing import CliRunner
2 | from llm.cli import cli
3 | import pytest
4 | import json
5 |
6 |
7 | @pytest.mark.parametrize(
8 | "args,expected_options,expected_error",
9 | (
10 | (
11 | ["gpt-4o-mini", "temperature", "0.5"],
12 | {"gpt-4o-mini": {"temperature": "0.5"}},
13 | None,
14 | ),
15 | (
16 | ["gpt-4o-mini", "temperature", "invalid"],
17 | {},
18 | "Error: temperature\n Input should be a valid number",
19 | ),
20 | (
21 | ["gpt-4o-mini", "not-an-option", "invalid"],
22 | {},
23 | "Extra inputs are not permitted",
24 | ),
25 | ),
26 | )
27 | def test_set_model_default_options(user_path, args, expected_options, expected_error):
28 | path = user_path / "model_options.json"
29 | assert not path.exists()
30 | runner = CliRunner()
31 | result = runner.invoke(cli, ["models", "options", "set"] + args)
32 | if not expected_error:
33 | assert result.exit_code == 0
34 | assert path.exists()
35 | data = json.loads(path.read_text("utf-8"))
36 | assert data == expected_options
37 | else:
38 | assert result.exit_code == 1
39 | assert expected_error in result.output
40 |
41 |
42 | def test_model_options_list_and_show(user_path):
43 | (user_path / "model_options.json").write_text(
44 | json.dumps(
45 | {"gpt-4o-mini": {"temperature": 0.5}, "gpt-4o": {"temperature": 0.7}}
46 | ),
47 | "utf-8",
48 | )
49 | runner = CliRunner()
50 | result = runner.invoke(cli, ["models", "options", "list"])
51 | assert result.exit_code == 0
52 | assert (
53 | result.output
54 | == "gpt-4o-mini:\n temperature: 0.5\ngpt-4o:\n temperature: 0.7\n"
55 | )
56 | result = runner.invoke(cli, ["models", "options", "show", "gpt-4o-mini"])
57 | assert result.exit_code == 0
58 | assert result.output == "temperature: 0.5\n"
59 |
60 |
61 | def test_model_options_clear(user_path):
62 | path = user_path / "model_options.json"
63 | path.write_text(
64 | json.dumps(
65 | {
66 | "gpt-4o-mini": {"temperature": 0.5},
67 | "gpt-4o": {"temperature": 0.7, "top_p": 0.9},
68 | }
69 | ),
70 | "utf-8",
71 | )
72 | assert path.exists()
73 | runner = CliRunner()
74 | # Clear all for gpt-4o-mini
75 | result = runner.invoke(cli, ["models", "options", "clear", "gpt-4o-mini"])
76 | assert result.exit_code == 0
77 | # Clear just top_p for gpt-4o
78 | result2 = runner.invoke(cli, ["models", "options", "clear", "gpt-4o", "top_p"])
79 | assert result2.exit_code == 0
80 | data = json.loads(path.read_text("utf-8"))
81 | assert data == {"gpt-4o": {"temperature": 0.7}}
82 |
83 |
84 | def test_prompt_uses_model_options(user_path):
85 | path = user_path / "model_options.json"
86 | path.write_text("{}", "utf-8")
87 | # Prompt should not use an option
88 | runner = CliRunner()
89 | result = runner.invoke(cli, ["-m", "echo", "prompt"])
90 | assert result.exit_code == 0
91 | assert json.loads(result.output) == {
92 | "prompt": "prompt",
93 | "system": "",
94 | "attachments": [],
95 | "stream": True,
96 | "previous": [],
97 | }
98 |
99 | # Now set an option
100 | path.write_text(json.dumps({"echo": {"example_bool": True}}), "utf-8")
101 |
102 | result2 = runner.invoke(cli, ["-m", "echo", "prompt"])
103 | assert result2.exit_code == 0
104 | assert json.loads(result2.output) == {
105 | "prompt": "prompt",
106 | "system": "",
107 | "attachments": [],
108 | "stream": True,
109 | "previous": [],
110 | "options": {"example_bool": True},
111 | }
112 |
113 | # Option can be over-ridden
114 | result3 = runner.invoke(
115 | cli, ["-m", "echo", "prompt", "-o", "example_bool", "false"]
116 | )
117 | assert result3.exit_code == 0
118 | assert json.loads(result3.output) == {
119 | "prompt": "prompt",
120 | "system": "",
121 | "attachments": [],
122 | "stream": True,
123 | "previous": [],
124 | "options": {"example_bool": False},
125 | }
126 | # Using an alias should also pick up that option
127 | aliases_path = user_path / "aliases.json"
128 | aliases_path.write_text('{"e": "echo"}', "utf-8")
129 | result4 = runner.invoke(cli, ["-m", "e", "prompt"])
130 | assert result4.exit_code == 0
131 | assert json.loads(result4.output) == {
132 | "prompt": "prompt",
133 | "system": "",
134 | "attachments": [],
135 | "stream": True,
136 | "previous": [],
137 | "options": {"example_bool": True},
138 | }
139 |
--------------------------------------------------------------------------------
/tests/test_embed.py:
--------------------------------------------------------------------------------
1 | import json
2 | import llm
3 | from llm.embeddings import Entry
4 | import pytest
5 | import sqlite_utils
6 | from unittest.mock import ANY
7 |
8 |
9 | def test_demo_plugin():
10 | model = llm.get_embedding_model("embed-demo")
11 | assert model.embed("hello world") == [5, 5] + [0] * 14
12 |
13 |
14 | @pytest.mark.parametrize(
15 | "batch_size,expected_batches",
16 | (
17 | (None, 100),
18 | (10, 100),
19 | ),
20 | )
21 | def test_embed_huge_list(batch_size, expected_batches):
22 | model = llm.get_embedding_model("embed-demo")
23 | huge_list = ("hello {}".format(i) for i in range(1000))
24 | kwargs = {}
25 | if batch_size:
26 | kwargs["batch_size"] = batch_size
27 | results = model.embed_multi(huge_list, **kwargs)
28 | assert repr(type(results)) == ""
29 | first_twos = {}
30 | for result in results:
31 | key = (result[0], result[1])
32 | first_twos[key] = first_twos.get(key, 0) + 1
33 | assert first_twos == {(5, 1): 10, (5, 2): 90, (5, 3): 900}
34 | assert model.batch_count == expected_batches
35 |
36 |
37 | def test_embed_store(collection):
38 | collection.embed("3", "hello world again", store=True)
39 | assert collection.db["embeddings"].count == 3
40 | assert (
41 | next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["content"]
42 | == "hello world again"
43 | )
44 |
45 |
46 | def test_embed_metadata(collection):
47 | collection.embed("3", "hello yet again", metadata={"foo": "bar"}, store=True)
48 | assert collection.db["embeddings"].count == 3
49 | assert json.loads(
50 | next(collection.db["embeddings"].rows_where("id = ?", ["3"]))["metadata"]
51 | ) == {"foo": "bar"}
52 | entry = collection.similar("hello yet again")[0]
53 | assert entry.id == "3"
54 | assert entry.metadata == {"foo": "bar"}
55 | assert entry.content == "hello yet again"
56 |
57 |
58 | def test_collection(collection):
59 | assert collection.id == 1
60 | assert collection.count() == 2
61 | # Check that the embeddings are there
62 | rows = list(collection.db["embeddings"].rows)
63 | assert rows == [
64 | {
65 | "collection_id": 1,
66 | "id": "1",
67 | "embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
68 | "content": None,
69 | "content_blob": None,
70 | "content_hash": collection.content_hash("hello world"),
71 | "metadata": None,
72 | "updated": ANY,
73 | },
74 | {
75 | "collection_id": 1,
76 | "id": "2",
77 | "embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
78 | "content": None,
79 | "content_blob": None,
80 | "content_hash": collection.content_hash("goodbye world"),
81 | "metadata": None,
82 | "updated": ANY,
83 | },
84 | ]
85 | assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0
86 |
87 |
88 | def test_similar(collection):
89 | results = list(collection.similar("hello world"))
90 | assert results == [
91 | Entry(id="1", score=pytest.approx(0.9999999999999999)),
92 | Entry(id="2", score=pytest.approx(0.9863939238321437)),
93 | ]
94 |
95 |
96 | def test_similar_prefixed(collection):
97 | results = list(collection.similar("hello world", prefix="2"))
98 | assert results == [
99 | Entry(id="2", score=pytest.approx(0.9863939238321437)),
100 | ]
101 |
102 |
103 | def test_similar_by_id(collection):
104 | results = list(collection.similar_by_id("1"))
105 | assert results == [
106 | Entry(id="2", score=pytest.approx(0.9863939238321437)),
107 | ]
108 |
109 |
110 | @pytest.mark.parametrize(
111 | "batch_size,expected_batches",
112 | (
113 | (None, 100),
114 | (5, 200),
115 | ),
116 | )
117 | @pytest.mark.parametrize("with_metadata", (False, True))
118 | def test_embed_multi(with_metadata, batch_size, expected_batches):
119 | db = sqlite_utils.Database(memory=True)
120 | collection = llm.Collection("test", db, model_id="embed-demo")
121 | model = collection.model()
122 | assert getattr(model, "batch_count", 0) == 0
123 | ids_and_texts = ((str(i), "hello {}".format(i)) for i in range(1000))
124 | kwargs = {}
125 | if batch_size is not None:
126 | kwargs["batch_size"] = batch_size
127 | if with_metadata:
128 | ids_and_texts = ((id, text, {"meta": id}) for id, text in ids_and_texts)
129 | collection.embed_multi_with_metadata(ids_and_texts, **kwargs)
130 | else:
131 | # Exercise store=True here too
132 | collection.embed_multi(ids_and_texts, store=True, **kwargs)
133 | rows = list(db["embeddings"].rows)
134 | assert len(rows) == 1000
135 | rows_with_metadata = [row for row in rows if row["metadata"] is not None]
136 | rows_with_content = [row for row in rows if row["content"] is not None]
137 | if with_metadata:
138 | assert len(rows_with_metadata) == 1000
139 | assert len(rows_with_content) == 0
140 | else:
141 | assert len(rows_with_metadata) == 0
142 | assert len(rows_with_content) == 1000
143 | # Every row should have content_hash set
144 | assert all(row["content_hash"] is not None for row in rows)
145 | # Check batch count
146 | assert collection.model().batch_count == expected_batches
147 |
148 |
149 | def test_collection_delete(collection):
150 | db = collection.db
151 | assert db["embeddings"].count == 2
152 | assert db["collections"].count == 1
153 | collection.delete()
154 | assert db["embeddings"].count == 0
155 | assert db["collections"].count == 0
156 |
157 |
158 | def test_binary_only_and_text_only_embedding_models():
159 | binary_only = llm.get_embedding_model("embed-binary-only")
160 | text_only = llm.get_embedding_model("embed-text-only")
161 |
162 | assert binary_only.supports_binary
163 | assert not binary_only.supports_text
164 | assert not text_only.supports_binary
165 | assert text_only.supports_text
166 |
167 | with pytest.raises(ValueError):
168 | binary_only.embed("hello world")
169 |
170 | binary_only.embed(b"hello world")
171 |
172 | with pytest.raises(ValueError):
173 | text_only.embed(b"hello world")
174 |
175 | text_only.embed("hello world")
176 |
177 | # Try the multi versions too
178 | # Have to call list() on this or the generator is not evaluated
179 | with pytest.raises(ValueError):
180 | list(binary_only.embed_multi(["hello world"]))
181 |
182 | list(binary_only.embed_multi([b"hello world"]))
183 |
184 | with pytest.raises(ValueError):
185 | list(text_only.embed_multi([b"hello world"]))
186 |
187 | list(text_only.embed_multi(["hello world"]))
188 |
--------------------------------------------------------------------------------
/tests/test_encode_decode.py:
--------------------------------------------------------------------------------
1 | import llm
2 | import pytest
3 | import numpy as np
4 |
5 |
6 | @pytest.mark.parametrize(
7 | "array",
8 | (
9 | (0.0, 1.0, 1.5),
10 | (3423.0, 222.0, -1234.5),
11 | ),
12 | )
13 | def test_roundtrip(array):
14 | encoded = llm.encode(array)
15 | decoded = llm.decode(encoded)
16 | assert decoded == array
17 | # Try with numpy as well
18 | numpy_decoded = np.frombuffer(encoded, "