├── .gitignore ├── tests └── test_llama_server.py ├── .github └── workflows │ ├── test.yml │ └── publish.yml ├── pyproject.toml ├── llm_llama_server.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | .vscode 11 | dist 12 | build 13 | -------------------------------------------------------------------------------- /tests/test_llama_server.py: -------------------------------------------------------------------------------- 1 | from llm import get_models, get_async_models 2 | 3 | 4 | def test_plugin_is_installed(): 5 | models = [model.model_id for model in get_models()] 6 | async_models = [model.model_id for model in get_async_models()] 7 | assert "llama-server" in models 8 | assert "llama-server-vision" in models 9 | assert "llama-server" in async_models 10 | assert "llama-server-vision" in async_models 11 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: pyproject.toml 22 | - name: Install dependencies 23 | run: | 24 | pip install -e '.[test]' 25 | - name: Run tests 26 | run: | 27 | python -m pytest 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "llm-llama-server" 3 | version = "0.2" 4 | description = "Interact with llama-server models" 5 | readme = "README.md" 6 | authors = [{name = "Simon Willison"}] 7 | license = "Apache-2.0" 8 | classifiers = [] 9 | requires-python = ">=3.9" 10 | dependencies = [ 11 | "llm>=0.26" 12 | ] 13 | 14 | [build-system] 15 | requires = ["setuptools"] 16 | build-backend = "setuptools.build_meta" 17 | 18 | [project.urls] 19 | Homepage = "https://github.com/simonw/llm-llama-server" 20 | Changelog = "https://github.com/simonw/llm-llama-server/releases" 21 | Issues = "https://github.com/simonw/llm-llama-server/issues" 22 | CI = "https://github.com/simonw/llm-llama-server/actions" 23 | 24 | [project.entry-points.llm] 25 | llama_server = "llm_llama_server" 26 | 27 | [project.optional-dependencies] 28 | test = ["pytest"] 29 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | cache-dependency-path: pyproject.toml 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test]' 27 | - name: Run tests 28 | run: | 29 | python -m pytest 30 | deploy: 31 | runs-on: ubuntu-latest 32 | needs: [test] 33 | environment: release 34 | permissions: 35 | id-token: write 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: "3.13" 42 | cache: pip 43 | cache-dependency-path: pyproject.toml 44 | - name: Install dependencies 45 | run: | 46 | pip install setuptools wheel build 47 | - name: Build 48 | run: | 49 | python -m build 50 | - name: Publish 51 | uses: pypa/gh-action-pypi-publish@release/v1 52 | -------------------------------------------------------------------------------- /llm_llama_server.py: -------------------------------------------------------------------------------- 1 | import llm 2 | from llm.default_plugins.openai_models import Chat, AsyncChat 3 | 4 | 5 | class LlamaServer(Chat): 6 | model_id = "llama-server" 7 | key = "sk-llama-server" 8 | 9 | def __init__(self, **kwargs): 10 | super().__init__( 11 | model_name="llama-server", 12 | model_id=self.model_id, 13 | api_base="http://localhost:8080/v1", 14 | **kwargs, 15 | ) 16 | 17 | def __str__(self): 18 | return "llama-server: {}".format(self.model_id) 19 | 20 | 21 | class AsyncLlamaServer(AsyncChat): 22 | model_id = "llama-server" 23 | key = "sk-llama-server" 24 | 25 | def __init__(self, **kwargs): 26 | super().__init__( 27 | model_name="llama-server", 28 | model_id=self.model_id, 29 | api_base="http://localhost:8080/v1", 30 | **kwargs, 31 | ) 32 | 33 | def __str__(self): 34 | return f"llama-server (async): {self.model_id}" 35 | 36 | 37 | class LlamaServerVision(LlamaServer): 38 | model_id = "llama-server-vision" 39 | 40 | 41 | class AsyncLlamaServerVision(AsyncLlamaServer): 42 | model_id = "llama-server-vision" 43 | 44 | 45 | class LlamaServerTools(LlamaServer): 46 | model_id = "llama-server-tools" 47 | 48 | 49 | class AsyncLlamaServerTools(AsyncLlamaServer): 50 | model_id = "llama-server-tools" 51 | 52 | 53 | @llm.hookimpl 54 | def register_models(register): 55 | register( 56 | LlamaServer(), 57 | AsyncLlamaServer(), 58 | ) 59 | register( 60 | LlamaServerVision(vision=True), 61 | AsyncLlamaServerVision(vision=True), 62 | ) 63 | register( 64 | LlamaServerTools(vision=True, can_stream=False, supports_tools=True), 65 | AsyncLlamaServerTools(vision=True, can_stream=False, supports_tools=True), 66 | ) 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # llm-llama-server 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/llm-llama-server.svg)](https://pypi.org/project/llm-llama-server/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/llm-llama-server?include_prereleases&label=changelog)](https://github.com/simonw/llm-llama-server/releases) 5 | [![Tests](https://github.com/simonw/llm-llama-server/actions/workflows/test.yml/badge.svg)](https://github.com/simonw/llm-llama-server/actions/workflows/test.yml) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/llm-llama-server/blob/main/LICENSE) 7 | 8 | Interact with llama-server models 9 | 10 | ## Installation 11 | 12 | Install this plugin in the same environment as [LLM](https://llm.datasette.io/). 13 | ```bash 14 | llm install llm-llama-server 15 | ``` 16 | ## Usage 17 | 18 | You'll need to be running a [llama-server](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) on port 8080 to use this plugin. 19 | 20 | You can `brew install llama.cpp` to obtain that binary. Then run it like this: 21 | ```bash 22 | llama-server -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_XL 23 | ``` 24 | This loads and serves the [unsloth/gemma-3-4b-it-GGUF](https://huggingface.co/unsloth/gemma-3-4b-it-GGUF) GGUF version of [Gemma 3 4B](https://ai.google.dev/gemma/docs/core) - a 3.2GB download. 25 | 26 | To access regular models from LLM, use the `llama-server` model: 27 | ```bash 28 | llm -m llama-server "say hi" 29 | ``` 30 | For vision models, use `llama-server-vision`: 31 | ```bash 32 | llm -m llama-server-vision describe -a path/to/image.png 33 | ``` 34 | For models with [tools](https://llm.datasette.io/en/stable/tools.html) (which also support vision) use `llama-server-tools`: 35 | ```bash 36 | llm -m llama-server-tools -T llm_time 'time?' --td 37 | ``` 38 | You'll need to run the `llama-server` with the `--jinja` flag in order for this to work: 39 | ```bash 40 | llama-server --jinja -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_XL 41 | ``` 42 | Or for a slightly stronger [7.3GB model](https://huggingface.co/unsloth/gemma-3-12b-it-qat-GGUF): 43 | ```bash 44 | llama-server --jinja -hf unsloth/gemma-3-12b-it-qat-GGUF:Q4_K_M 45 | ``` 46 | ## Development 47 | 48 | To set up this plugin locally, first checkout the code. Then create a new virtual environment: 49 | ```bash 50 | cd llm-llama-server 51 | python -m venv venv 52 | source venv/bin/activate 53 | ``` 54 | Now install the dependencies and test dependencies: 55 | ```bash 56 | python -m pip install -e '.[test]' 57 | ``` 58 | To run the tests: 59 | ```bash 60 | python -m pytest 61 | ``` 62 | --------------------------------------------------------------------------------