├── .gitignore
├── tests
    └── test_llama_server.py
├── .github
    └── workflows
    │   ├── test.yml
    │   └── publish.yml
├── pyproject.toml
├── llm_llama_server.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | venv
 6 | .eggs
 7 | .pytest_cache
 8 | *.egg-info
 9 | .DS_Store
10 | .vscode
11 | dist
12 | build
13 | 


--------------------------------------------------------------------------------
/tests/test_llama_server.py:
--------------------------------------------------------------------------------
 1 | from llm import get_models, get_async_models
 2 | 
 3 | 
 4 | def test_plugin_is_installed():
 5 |     models = [model.model_id for model in get_models()]
 6 |     async_models = [model.model_id for model in get_async_models()]
 7 |     assert "llama-server" in models
 8 |     assert "llama-server-vision" in models
 9 |     assert "llama-server" in async_models
10 |     assert "llama-server-vision" in async_models
11 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | permissions:
 6 |   contents: read
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |     - name: Set up Python ${{ matrix.python-version }}
17 |       uses: actions/setup-python@v5
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |         cache: pip
21 |         cache-dependency-path: pyproject.toml
22 |     - name: Install dependencies
23 |       run: |
24 |         pip install -e '.[test]'
25 |     - name: Run tests
26 |       run: |
27 |         python -m pytest
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "llm-llama-server"
 3 | version = "0.2"
 4 | description = "Interact with llama-server models"
 5 | readme = "README.md"
 6 | authors = [{name = "Simon Willison"}]
 7 | license = "Apache-2.0"
 8 | classifiers = []
 9 | requires-python = ">=3.9"
10 | dependencies = [
11 |     "llm>=0.26"
12 | ]
13 | 
14 | [build-system]
15 | requires = ["setuptools"]
16 | build-backend = "setuptools.build_meta"
17 | 
18 | [project.urls]
19 | Homepage = "https://github.com/simonw/llm-llama-server"
20 | Changelog = "https://github.com/simonw/llm-llama-server/releases"
21 | Issues = "https://github.com/simonw/llm-llama-server/issues"
22 | CI = "https://github.com/simonw/llm-llama-server/actions"
23 | 
24 | [project.entry-points.llm]
25 | llama_server = "llm_llama_server"
26 | 
27 | [project.optional-dependencies]
28 | test = ["pytest"]
29 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v5
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 |         cache: pip
23 |         cache-dependency-path: pyproject.toml
24 |     - name: Install dependencies
25 |       run: |
26 |         pip install -e '.[test]'
27 |     - name: Run tests
28 |       run: |
29 |         python -m pytest
30 |   deploy:
31 |     runs-on: ubuntu-latest
32 |     needs: [test]
33 |     environment: release
34 |     permissions:
35 |       id-token: write
36 |     steps:
37 |     - uses: actions/checkout@v4
38 |     - name: Set up Python
39 |       uses: actions/setup-python@v5
40 |       with:
41 |         python-version: "3.13"
42 |         cache: pip
43 |         cache-dependency-path: pyproject.toml
44 |     - name: Install dependencies
45 |       run: |
46 |         pip install setuptools wheel build
47 |     - name: Build
48 |       run: |
49 |         python -m build
50 |     - name: Publish
51 |       uses: pypa/gh-action-pypi-publish@release/v1
52 | 


--------------------------------------------------------------------------------
/llm_llama_server.py:
--------------------------------------------------------------------------------
 1 | import llm
 2 | from llm.default_plugins.openai_models import Chat, AsyncChat
 3 | 
 4 | 
 5 | class LlamaServer(Chat):
 6 |     model_id = "llama-server"
 7 |     key = "sk-llama-server"
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super().__init__(
11 |             model_name="llama-server",
12 |             model_id=self.model_id,
13 |             api_base="http://localhost:8080/v1",
14 |             **kwargs,
15 |         )
16 | 
17 |     def __str__(self):
18 |         return "llama-server: {}".format(self.model_id)
19 | 
20 | 
21 | class AsyncLlamaServer(AsyncChat):
22 |     model_id = "llama-server"
23 |     key = "sk-llama-server"
24 | 
25 |     def __init__(self, **kwargs):
26 |         super().__init__(
27 |             model_name="llama-server",
28 |             model_id=self.model_id,
29 |             api_base="http://localhost:8080/v1",
30 |             **kwargs,
31 |         )
32 | 
33 |     def __str__(self):
34 |         return f"llama-server (async): {self.model_id}"
35 | 
36 | 
37 | class LlamaServerVision(LlamaServer):
38 |     model_id = "llama-server-vision"
39 | 
40 | 
41 | class AsyncLlamaServerVision(AsyncLlamaServer):
42 |     model_id = "llama-server-vision"
43 | 
44 | 
45 | class LlamaServerTools(LlamaServer):
46 |     model_id = "llama-server-tools"
47 | 
48 | 
49 | class AsyncLlamaServerTools(AsyncLlamaServer):
50 |     model_id = "llama-server-tools"
51 | 
52 | 
53 | @llm.hookimpl
54 | def register_models(register):
55 |     register(
56 |         LlamaServer(),
57 |         AsyncLlamaServer(),
58 |     )
59 |     register(
60 |         LlamaServerVision(vision=True),
61 |         AsyncLlamaServerVision(vision=True),
62 |     )
63 |     register(
64 |         LlamaServerTools(vision=True, can_stream=False, supports_tools=True),
65 |         AsyncLlamaServerTools(vision=True, can_stream=False, supports_tools=True),
66 |     )
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # llm-llama-server
 2 | 
 3 | [![PyPI](https://img.shields.io/pypi/v/llm-llama-server.svg)](https://pypi.org/project/llm-llama-server/)
 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/llm-llama-server?include_prereleases&label=changelog)](https://github.com/simonw/llm-llama-server/releases)
 5 | [![Tests](https://github.com/simonw/llm-llama-server/actions/workflows/test.yml/badge.svg)](https://github.com/simonw/llm-llama-server/actions/workflows/test.yml)
 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/llm-llama-server/blob/main/LICENSE)
 7 | 
 8 | Interact with llama-server models
 9 | 
10 | ## Installation
11 | 
12 | Install this plugin in the same environment as [LLM](https://llm.datasette.io/).
13 | ```bash
14 | llm install llm-llama-server
15 | ```
16 | ## Usage
17 | 
18 | You'll need to be running a [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) on port 8080 to use this plugin.
19 | 
20 | You can `brew install llama.cpp` to obtain that binary. Then run it like this:
21 | ```bash
22 | llama-server -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_XL
23 | ```
24 | This loads and serves the [unsloth/gemma-3-4b-it-GGUF](https://huggingface.co/unsloth/gemma-3-4b-it-GGUF) GGUF version of [Gemma 3 4B](https://ai.google.dev/gemma/docs/core) - a 3.2GB download.
25 | 
26 | To access regular models from LLM, use the `llama-server` model:
27 | ```bash
28 | llm -m llama-server "say hi"
29 | ```
30 | For vision models, use `llama-server-vision`:
31 | ```bash
32 | llm -m llama-server-vision describe -a path/to/image.png
33 | ```
34 | For models with [tools](https://llm.datasette.io/en/stable/tools.html) (which also support vision) use `llama-server-tools`:
35 | ```bash
36 | llm -m llama-server-tools -T llm_time 'time?' --td
37 | ```
38 | You'll need to run the `llama-server` with the `--jinja` flag in order for this to work:
39 | ```bash
40 | llama-server --jinja -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_XL
41 | ```
42 | Or for a slightly stronger [7.3GB model](https://huggingface.co/unsloth/gemma-3-12b-it-qat-GGUF):
43 | ```bash
44 | llama-server --jinja -hf unsloth/gemma-3-12b-it-qat-GGUF:Q4_K_M
45 | ```
46 | ## Development
47 | 
48 | To set up this plugin locally, first checkout the code. Then create a new virtual environment:
49 | ```bash
50 | cd llm-llama-server
51 | python -m venv venv
52 | source venv/bin/activate
53 | ```
54 | Now install the dependencies and test dependencies:
55 | ```bash
56 | python -m pip install -e '.[test]'
57 | ```
58 | To run the tests:
59 | ```bash
60 | python -m pytest
61 | ```
62 | 


--------------------------------------------------------------------------------