├── .devcontainer
    ├── devcontainer.json
    ├── postCreateCommand.sh
    └── postStartCommand.sh
├── .github
    └── workflows
    │   ├── python-ci.yml
    │   └── python-publish.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── CONTRIBUTING.md
├── README.md
├── llm_github_models.py
├── pyproject.toml
├── tests
    ├── files
    │   ├── kick.wav
    │   └── salmon.jpeg
    ├── test_llm_github_embeddings.py
    ├── test_llm_github_models.py
    └── test_tool_support.py
└── tools
    ├── README.md
    ├── download_models_json.py
    ├── models.fragment.md
    ├── models.json
    └── parse_models_json.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |         "name": "Python 3.13",
 3 |         "image": "mcr.microsoft.com/devcontainers/python:3.13-bullseye",
 4 |         "customizations": {
 5 |                 "vscode": {
 6 |                         "settings": {
 7 |                                 "python.defaultInterpreterPath": "/usr/local/bin/python",
 8 |                                 "python.linting.enabled": true
 9 |                         },
10 |                         "extensions": [
11 |                                 "ms-python.python",
12 |                                 "ms-python.vscode-pylance",
13 |                                 "ms-python.vscode-python-envs",
14 |                                 "charliermarsh.ruff"
15 |                         ]
16 |                 }
17 |         },
18 |         "postCreateCommand": ".devcontainer/postCreateCommand.sh",
19 |         "postStartCommand": ".devcontainer/postStartCommand.sh"
20 | }


--------------------------------------------------------------------------------
/.devcontainer/postCreateCommand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | pip install .
6 | 
7 | llm install -e .


--------------------------------------------------------------------------------
/.devcontainer/postStartCommand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | llm keys set github --value $GITHUB_TOKEN


--------------------------------------------------------------------------------
/.github/workflows/python-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Python package
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       models: read
13 |     strategy:
14 |       matrix:
15 |         python-version: [3.9, "3.10", 3.11, 3.12, 3.13]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v2
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install ".[test]"
27 |     - name: Run ruff format
28 |       run: |
29 |         ruff check
30 |     - name: Run pyright
31 |       run: |
32 |         pyright llm_github_models.py
33 |     - name: Run tests
34 |       run: |
35 |         pytest
36 |       env:
37 |         GITHUB_MODELS_KEY: ${{ secrets.GITHUB_TOKEN }}
38 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package to PyPI when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   release-build:
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: actions/setup-python@v5
26 |         with:
27 |           python-version: "3.x"
28 | 
29 |       - name: Build release distributions
30 |         run: |
31 |           # NOTE: put your own distribution build steps here.
32 |           python -m pip install build
33 |           python -m build
34 | 
35 |       - name: Upload distributions
36 |         uses: actions/upload-artifact@v4
37 |         with:
38 |           name: release-dists
39 |           path: dist/
40 | 
41 |   pypi-publish:
42 |     runs-on: ubuntu-latest
43 |     needs:
44 |       - release-build
45 |     permissions:
46 |       # IMPORTANT: this permission is mandatory for trusted publishing
47 |       id-token: write
48 | 
49 |     # Dedicated environments with protections for publishing are strongly recommended.
50 |     # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
51 |     environment:
52 |       name: pypi
53 |       # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
54 |       # url: https://pypi.org/p/YOURPROJECT
55 |       #
56 |       # ALTERNATIVE: if your GitHub Release name is the PyPI project version string
57 |       # ALTERNATIVE: exactly, uncomment the following line instead:
58 |       # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }}
59 | 
60 |     steps:
61 |       - name: Retrieve release distributions
62 |         uses: actions/download-artifact@v4
63 |         with:
64 |           name: release-dists
65 |           path: dist/
66 | 
67 |       - name: Publish release distributions to PyPI
68 |         uses: pypa/gh-action-pypi-publish@release/v1
69 |         with:
70 |           packages-dir: dist/
71 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | [.]venv/
2 | env/
3 | *.py[co]
4 | __pycache__/
5 | *.egg-info/
6 | build/


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "configurations": [
 3 |     {
 4 |       "name": "GPT-4o with audio",
 5 |       "type": "debugpy",
 6 |       "request": "launch",
 7 |       "module": "llm",
 8 |       "args": [
 9 |         "what is happening in this audio?",
10 |         "-m",
11 |         "github/gpt-4o",
12 |         "-a",
13 |         "tests/files/kick.wav"
14 |       ],
15 |     },
16 |     {
17 |       "name": "GPT-4o with function",
18 |       "type": "debugpy",
19 |       "request": "launch",
20 |       "module": "llm",
21 |       // This needs to be string instead of an array so the the whitespace is preserved
22 |       "args": "'what is 34234 * 213345?' -m github/gpt-4o --functions 'def multiply(x: int, y: int) -> int:\n    \"\"\"Multiply two numbers.\"\"\"\n    return x * y\n' --td"
23 |     }
24 |   ]
25 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.testing.pytestArgs": [
 3 |         "tests"
 4 |     ],
 5 |     "python.testing.unittestEnabled": false,
 6 |     "python.testing.pytestEnabled": true,
 7 |     "[python]": {
 8 |         "editor.formatOnSave": true,
 9 |         "editor.codeActionsOnSave": {
10 |             "source.fixAll": "explicit",
11 |             "source.unusedImports": "explicit",
12 |             "source.organizeImports": "explicit"
13 |         },
14 |         "editor.defaultFormatter": "charliermarsh.ruff"
15 |     }
16 | }


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # CONTRIBUTING
 2 | 
 3 | ## Required tools
 4 | 
 5 | - [Python 3.9+](https://docs.python.org/3/using/index.html)
 6 | 
 7 | ## Setup
 8 | 
 9 | 1. Set up a virtual environment at `//.venv` and activate it
10 |     (see [the docs](https://docs.python.org/3/library/venv.html) for more information)
11 | 1. `llm install -e '.[test]'` to install all dependencies
12 | 
13 | ## Running tests
14 | 
15 | 1. `pytest` to run tests
16 | 
17 | ## Code formatting and type checks
18 | 
19 | Pull-requests will only pass in CI/CD if the following are met:
20 | 
21 | 1. `ruff check`
22 | 2. `pyright llm_github_models.py`
23 | 3. `ruff format --check`
24 | 
25 | Run `ruff check --fix` to resort imports before submitting PRs, or commit another change. Run `ruff format` to bring the code file up to our style guidelines.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GitHub Models Plugin for LLM
  2 | [![PyPI](https://img.shields.io/pypi/v/llm-github-models.svg)](https://pypi.org/project/llm-github-models/)
  3 | [![Changelog](https://img.shields.io/github/v/release/tonybaloney/llm-github-models?include_prereleases&label=changelog)](https://github.com/tonybaloney/llm-github-models/releases)
  4 | 
  5 | This is a plugin for [llm](https://llm.datasette.io) that uses [GitHub Models](https://github.blog/news-insights/product-news/introducing-github-models/) via the Azure AI Inference SDK. GitHub Models is available to all GitHub users and offers **free** usage of many AI LLMs. 
  6 | 
  7 | ## Features
  8 | 
  9 | - Support for all >30 models, including GPT-4o, 4.1, o3, DeepSeek-R1, Llama3.x and more
 10 | - Support for [schemas](https://llm.datasette.io/en/stable/schemas.html)
 11 | - Output token usage
 12 | - Support for [Embedding Models](https://llm.datasette.io/en/stable/embeddings/index.html)
 13 | - Async and streaming outputs (model dependent)
 14 | - Support for model attachments
 15 | - Support for [tools](https://llm.datasette.io/en/stable/tools.html)
 16 | 
 17 | ## Installation
 18 | 
 19 | ```default
 20 | $ llm install llm-github-models
 21 | ```
 22 | 
 23 | or `pip install llm-github-models`
 24 | 
 25 | ## Usage
 26 | 
 27 | To set the API key, use the `llm keys set github` command or use the `GITHUB_MODELS_KEY` environment variable.
 28 | 
 29 | To get an API key, create a personal access token (PAT) inside [GitHub Settings](https://github.com/settings/tokens).
 30 | 
 31 | Learn about [rate limits here](https://docs.github.com/github-models/prototyping-with-ai-models#rate-limits)
 32 | 
 33 | All model names are affixed with `github/` to distinguish the OpenAI ones from the builtin models.
 34 | 
 35 | ## Example
 36 | 
 37 | ```default
 38 | $ llm prompt 'top facts about cheese' -m github/gpt-4.1-mini
 39 | Sure! Here are some top facts about cheese:
 40 | 
 41 | 1. **Ancient Origins**: Cheese is one of the oldest man-made foods, with evidence of cheese-making dating back over 7,000 years.
 42 | 
 43 | 2. **Variety**: There are over 1,800 distinct types of cheese worldwide, varying by texture, flavor, milk source, and production methods.
 44 | ```
 45 | 
 46 | ### Image attachments
 47 | 
 48 | Multi-modal vision models can accept image attachments using the [LLM attachments](https://llm.datasette.io/en/stable/usage.html#attachments) options:
 49 | 
 50 | ```bash
 51 | llm -m github/Llama-3.2-11B-Vision-Instruct "Describe this image" -a https://static.simonwillison.net/static/2024/pelicans.jpg
 52 | ```
 53 | 
 54 | Produces
 55 | ```bash
 56 | This image depicts a dense gathering of pelicans, with the largest birds situated in the center, showcasing their light brown plumage and long, pointed beaks. The pelicans are standing on a rocky shoreline, with a serene body of water behind them, characterized by its pale blue hue and gentle ripples. In the background, a dark, rocky cliff rises, adding depth to the scene.
 57 | 
 58 | The overall atmosphere of the image exudes tranquility, with the pelicans seemingly engaging in a social gathering or feeding activity. The photograph's clarity and focus on the pelicans' behavior evoke a sense of observation and appreciation for the natural world.
 59 | ```
 60 | 
 61 | ## Supported Models
 62 | 
 63 | ### Chat Models
 64 | 
 65 | | Model Name | Streaming | Schemas | Tools | Input Modalities | Output Modalities |
 66 | |------------|-----------|---------|-------|------------------|-------------------|
 67 | | AI21-Jamba-1.5-Large | ✅ | ❌ | ❌ | text | text |
 68 | | AI21-Jamba-1.5-Mini | ✅ | ❌ | ❌ | text | text |
 69 | | Codestral-2501 | ✅ | ❌ | ✅ | text | text |
 70 | | Cohere-command-r | ✅ | ❌ | ✅ | text | text |
 71 | | Cohere-command-r-08-2024 | ✅ | ❌ | ✅ | text | text |
 72 | | Cohere-command-r-plus | ✅ | ❌ | ✅ | text | text |
 73 | | Cohere-command-r-plus-08-2024 | ✅ | ❌ | ✅ | text | text |
 74 | | DeepSeek-R1 | ✅ | ❌ | ❌ | text | text |
 75 | | DeepSeek-V3 | ✅ | ❌ | ❌ | text | text |
 76 | | DeepSeek-V3-0324 | ✅ | ❌ | ❌ | text | text |
 77 | | Llama-3.2-11B-Vision-Instruct | ✅ | ❌ | ❌ | text, image, audio | text |
 78 | | Llama-3.2-90B-Vision-Instruct | ✅ | ❌ | ❌ | text, image, audio | text |
 79 | | Llama-3.3-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 80 | | Llama-4-Maverick-17B-128E-Instruct-FP8 | ✅ | ❌ | ❌ | text, image | text |
 81 | | Llama-4-Scout-17B-16E-Instruct | ✅ | ❌ | ❌ | text, image | text |
 82 | | MAI-DS-R1 | ✅ | ❌ | ❌ | text | text |
 83 | | Meta-Llama-3-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 84 | | Meta-Llama-3-8B-Instruct | ✅ | ❌ | ❌ | text | text |
 85 | | Meta-Llama-3.1-405B-Instruct | ✅ | ❌ | ❌ | text | text |
 86 | | Meta-Llama-3.1-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 87 | | Meta-Llama-3.1-8B-Instruct | ✅ | ❌ | ❌ | text | text |
 88 | | Ministral-3B | ✅ | ❌ | ✅ | text | text |
 89 | | Mistral-Large-2411 | ✅ | ❌ | ✅ | text | text |
 90 | | Mistral-Nemo | ✅ | ❌ | ✅ | text | text |
 91 | | Mistral-large | ✅ | ❌ | ✅ | text | text |
 92 | | Mistral-large-2407 | ✅ | ❌ | ✅ | text | text |
 93 | | Mistral-small | ✅ | ❌ | ✅ | text | text |
 94 | | Phi-3-medium-128k-instruct | ✅ | ❌ | ❌ | text | text |
 95 | | Phi-3-medium-4k-instruct | ✅ | ❌ | ❌ | text | text |
 96 | | Phi-3-mini-128k-instruct | ✅ | ❌ | ❌ | text | text |
 97 | | Phi-3-mini-4k-instruct | ✅ | ❌ | ❌ | text | text |
 98 | | Phi-3-small-128k-instruct | ✅ | ❌ | ❌ | text | text |
 99 | | Phi-3-small-8k-instruct | ✅ | ❌ | ❌ | text | text |
100 | | Phi-3.5-MoE-instruct | ✅ | ❌ | ❌ | text | text |
101 | | Phi-3.5-mini-instruct | ✅ | ❌ | ❌ | text | text |
102 | | Phi-3.5-vision-instruct | ✅ | ❌ | ❌ | text, image | text |
103 | | Phi-4 | ✅ | ❌ | ❌ | text | text |
104 | | Phi-4-mini-instruct | ✅ | ❌ | ❌ | text | text |
105 | | Phi-4-mini-reasoning | ✅ | ❌ | ❌ | text | text |
106 | | Phi-4-multimodal-instruct | ✅ | ❌ | ❌ | audio, image, text | text |
107 | | Phi-4-reasoning | ✅ | ❌ | ❌ | text | text |
108 | | cohere-command-a | ✅ | ❌ | ✅ | text | text |
109 | | gpt-4.1 | ✅ | ✅ | ✅ | text, image | text |
110 | | gpt-4.1-mini | ✅ | ✅ | ✅ | text, image | text |
111 | | gpt-4.1-nano | ✅ | ✅ | ✅ | text, image | text |
112 | | gpt-4o | ✅ | ✅ | ✅ | text, image, audio | text |
113 | | gpt-4o-mini | ✅ | ✅ | ✅ | text, image, audio | text |
114 | | grok-3 | ✅ | ❌ | ✅ | text | text |
115 | | grok-3-mini | ✅ | ❌ | ✅ | text | text |
116 | | jais-30b-chat | ✅ | ❌ | ❌ | text | text |
117 | | mistral-medium-2505 | ✅ | ❌ | ✅ | text, image | text |
118 | | mistral-small-2503 | ✅ | ❌ | ✅ | text, image | text |
119 | | o1 | ❌ | ✅ | ✅ | text, image | text |
120 | | o1-mini | ❌ | ❌ | ❌ | text | text |
121 | | o1-preview | ❌ | ❌ | ❌ | text | text |
122 | | o3 | ✅ | ❌ | ✅ | text, image | text |
123 | | o3-mini | ❌ | ✅ | ✅ | text | text |
124 | | o4-mini | ✅ | ❌ | ✅ | text, image | text |
125 | 
126 | ### AI21 Jamba 1.5 Large
127 | 
128 | Usage: `llm -m github/AI21-Jamba-1.5-Large`
129 | 
130 | **Publisher:** AI21 Labs 
131 | 
132 | **Description:** A 398B parameters (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation. 
133 | 
134 | ### AI21 Jamba 1.5 Mini
135 | 
136 | Usage: `llm -m github/AI21-Jamba-1.5-Mini`
137 | 
138 | **Publisher:** AI21 Labs 
139 | 
140 | **Description:** A 52B parameters (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation. 
141 | 
142 | ### Codestral 25.01
143 | 
144 | Usage: `llm -m github/Codestral-2501`
145 | 
146 | **Publisher:** Mistral AI 
147 | 
148 | **Description:** Codestral 25.01 by Mistral AI is designed for code generation, supporting 80+ programming languages, and optimized for tasks like code completion and fill-in-the-middle 
149 | 
150 | ### Cohere Command R
151 | 
152 | Usage: `llm -m github/Cohere-command-r`
153 | 
154 | **Publisher:** Cohere 
155 | 
156 | **Description:** Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. 
157 | 
158 | ### Cohere Command R 08-2024
159 | 
160 | Usage: `llm -m github/Cohere-command-r-08-2024`
161 | 
162 | **Publisher:** Cohere 
163 | 
164 | **Description:** Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. 
165 | 
166 | ### Cohere Command R+
167 | 
168 | Usage: `llm -m github/Cohere-command-r-plus`
169 | 
170 | **Publisher:** Cohere 
171 | 
172 | **Description:** Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads. 
173 | 
174 | ### Cohere Command R+ 08-2024
175 | 
176 | Usage: `llm -m github/Cohere-command-r-plus-08-2024`
177 | 
178 | **Publisher:** Cohere 
179 | 
180 | **Description:** Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads. 
181 | 
182 | ### Cohere Embed v3 English
183 | 
184 | Usage: `llm -m github/Cohere-embed-v3-english`
185 | 
186 | **Publisher:** Cohere 
187 | 
188 | **Description:** Cohere Embed English is the market's leading text representation model used for semantic search, retrieval-augmented generation (RAG), classification, and clustering. 
189 | 
190 | ### Cohere Embed v3 Multilingual
191 | 
192 | Usage: `llm -m github/Cohere-embed-v3-multilingual`
193 | 
194 | **Publisher:** Cohere 
195 | 
196 | **Description:** Cohere Embed Multilingual is the market's leading text representation model used for semantic search, retrieval-augmented generation (RAG), classification, and clustering. 
197 | 
198 | ### DeepSeek-R1
199 | 
200 | Usage: `llm -m github/DeepSeek-R1`
201 | 
202 | **Publisher:** DeepSeek 
203 | 
204 | **Description:** DeepSeek-R1 excels at reasoning tasks using a step-by-step training process, such as language, scientific reasoning, and coding tasks. 
205 | 
206 | ### DeepSeek-V3
207 | 
208 | Usage: `llm -m github/DeepSeek-V3`
209 | 
210 | **Publisher:** DeepSeek 
211 | 
212 | **Description:** A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. 
213 | 
214 | ### DeepSeek-V3-0324
215 | 
216 | Usage: `llm -m github/DeepSeek-V3-0324`
217 | 
218 | **Publisher:** DeepSeek 
219 | 
220 | **Description:** DeepSeek-V3-0324 demonstrates notable improvements over its predecessor, DeepSeek-V3, in several key aspects, including enhanced reasoning, improved function calling, and superior code generation capabilities. 
221 | 
222 | ### Llama-3.2-11B-Vision-Instruct
223 | 
224 | Usage: `llm -m github/Llama-3.2-11B-Vision-Instruct`
225 | 
226 | **Publisher:** Meta 
227 | 
228 | **Description:** Excels in image reasoning capabilities on high-res images for visual understanding apps. 
229 | 
230 | ### Llama-3.2-90B-Vision-Instruct
231 | 
232 | Usage: `llm -m github/Llama-3.2-90B-Vision-Instruct`
233 | 
234 | **Publisher:** Meta 
235 | 
236 | **Description:** Advanced image reasoning capabilities for visual understanding agentic apps. 
237 | 
238 | ### Llama-3.3-70B-Instruct
239 | 
240 | Usage: `llm -m github/Llama-3.3-70B-Instruct`
241 | 
242 | **Publisher:** Meta 
243 | 
244 | **Description:** Llama 3.3 70B Instruct offers enhanced reasoning, math, and instruction following with performance comparable to Llama 3.1 405B. 
245 | 
246 | ### Llama 4 Maverick 17B 128E Instruct FP8
247 | 
248 | Usage: `llm -m github/Llama-4-Maverick-17B-128E-Instruct-FP8`
249 | 
250 | **Publisher:** Meta 
251 | 
252 | **Description:** Llama 4 Maverick 17B 128E Instruct FP8 is great at precise image understanding and creative writing, offering high quality at a lower price compared to Llama 3.3 70B 
253 | 
254 | ### Llama 4 Scout 17B 16E Instruct
255 | 
256 | Usage: `llm -m github/Llama-4-Scout-17B-16E-Instruct`
257 | 
258 | **Publisher:** Meta 
259 | 
260 | **Description:** Llama 4 Scout 17B 16E Instruct is great at multi-document summarization, parsing extensive user activity for personalized tasks, and reasoning over vast codebases. 
261 | 
262 | ### MAI-DS-R1
263 | 
264 | Usage: `llm -m github/MAI-DS-R1`
265 | 
266 | **Publisher:** Microsoft 
267 | 
268 | **Description:** MAI-DS-R1 is a DeepSeek-R1 reasoning model that has been post-trained by the Microsoft AI team to fill in information gaps in the previous version of the model and improve its harm protections while maintaining R1 reasoning capabilities. 
269 | 
270 | ### Meta-Llama-3-70B-Instruct
271 | 
272 | Usage: `llm -m github/Meta-Llama-3-70B-Instruct`
273 | 
274 | **Publisher:** Meta 
275 | 
276 | **Description:** A powerful 70-billion parameter model excelling in reasoning, coding, and broad language applications. 
277 | 
278 | ### Meta-Llama-3-8B-Instruct
279 | 
280 | Usage: `llm -m github/Meta-Llama-3-8B-Instruct`
281 | 
282 | **Publisher:** Meta 
283 | 
284 | **Description:** A versatile 8-billion parameter model optimized for dialogue and text generation tasks. 
285 | 
286 | ### Meta-Llama-3.1-405B-Instruct
287 | 
288 | Usage: `llm -m github/Meta-Llama-3.1-405B-Instruct`
289 | 
290 | **Publisher:** Meta 
291 | 
292 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
293 | 
294 | ### Meta-Llama-3.1-70B-Instruct
295 | 
296 | Usage: `llm -m github/Meta-Llama-3.1-70B-Instruct`
297 | 
298 | **Publisher:** Meta 
299 | 
300 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
301 | 
302 | ### Meta-Llama-3.1-8B-Instruct
303 | 
304 | Usage: `llm -m github/Meta-Llama-3.1-8B-Instruct`
305 | 
306 | **Publisher:** Meta 
307 | 
308 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
309 | 
310 | ### Ministral 3B
311 | 
312 | Usage: `llm -m github/Ministral-3B`
313 | 
314 | **Publisher:** Mistral AI 
315 | 
316 | **Description:** Ministral 3B is a state-of-the-art Small Language Model (SLM) optimized for edge computing and on-device applications. As it is designed for low-latency and compute-efficient inference, it it also the perfect model for standard GenAI applications that have 
317 | 
318 | ### Mistral Large 24.11
319 | 
320 | Usage: `llm -m github/Mistral-Large-2411`
321 | 
322 | **Publisher:** Mistral AI 
323 | 
324 | **Description:** Mistral Large 24.11 offers enhanced system prompts, advanced reasoning and function calling capabilities. 
325 | 
326 | ### Mistral Nemo
327 | 
328 | Usage: `llm -m github/Mistral-Nemo`
329 | 
330 | **Publisher:** Mistral AI 
331 | 
332 | **Description:** Mistral Nemo is a cutting-edge Language Model (LLM) boasting state-of-the-art reasoning, world knowledge, and coding capabilities within its size category. 
333 | 
334 | ### Mistral Large
335 | 
336 | Usage: `llm -m github/Mistral-large`
337 | 
338 | **Publisher:** Mistral AI 
339 | 
340 | **Description:** Mistral's flagship model that's ideal for complex tasks that require large reasoning capabilities or are highly specialized (Synthetic Text Generation, Code Generation, RAG, or Agents). 
341 | 
342 | ### Mistral Large (2407)
343 | 
344 | Usage: `llm -m github/Mistral-large-2407`
345 | 
346 | **Publisher:** Mistral AI 
347 | 
348 | **Description:** Mistral Large (2407) is an advanced Large Language Model (LLM) with state-of-the-art reasoning, knowledge and coding capabilities. 
349 | 
350 | ### Mistral Small
351 | 
352 | Usage: `llm -m github/Mistral-small`
353 | 
354 | **Publisher:** Mistral AI 
355 | 
356 | **Description:** Mistral Small can be used on any language-based task that requires high efficiency and low latency. 
357 | 
358 | ### Phi-3-medium instruct (128k)
359 | 
360 | Usage: `llm -m github/Phi-3-medium-128k-instruct`
361 | 
362 | **Publisher:** Microsoft 
363 | 
364 | **Description:** Same Phi-3-medium model, but with a larger context size for RAG or few shot prompting. 
365 | 
366 | ### Phi-3-medium instruct (4k)
367 | 
368 | Usage: `llm -m github/Phi-3-medium-4k-instruct`
369 | 
370 | **Publisher:** Microsoft 
371 | 
372 | **Description:** A 14B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data. 
373 | 
374 | ### Phi-3-mini instruct (128k)
375 | 
376 | Usage: `llm -m github/Phi-3-mini-128k-instruct`
377 | 
378 | **Publisher:** Microsoft 
379 | 
380 | **Description:** Same Phi-3-mini model, but with a larger context size for RAG or few shot prompting. 
381 | 
382 | ### Phi-3-mini instruct (4k)
383 | 
384 | Usage: `llm -m github/Phi-3-mini-4k-instruct`
385 | 
386 | **Publisher:** Microsoft 
387 | 
388 | **Description:** Tiniest member of the Phi-3 family. Optimized for both quality and low latency. 
389 | 
390 | ### Phi-3-small instruct (128k)
391 | 
392 | Usage: `llm -m github/Phi-3-small-128k-instruct`
393 | 
394 | **Publisher:** Microsoft 
395 | 
396 | **Description:** Same Phi-3-small model, but with a larger context size for RAG or few shot prompting. 
397 | 
398 | ### Phi-3-small instruct (8k)
399 | 
400 | Usage: `llm -m github/Phi-3-small-8k-instruct`
401 | 
402 | **Publisher:** Microsoft 
403 | 
404 | **Description:** A 7B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data. 
405 | 
406 | ### Phi-3.5-MoE instruct (128k)
407 | 
408 | Usage: `llm -m github/Phi-3.5-MoE-instruct`
409 | 
410 | **Publisher:** Microsoft 
411 | 
412 | **Description:** A new mixture of experts model 
413 | 
414 | ### Phi-3.5-mini instruct (128k)
415 | 
416 | Usage: `llm -m github/Phi-3.5-mini-instruct`
417 | 
418 | **Publisher:** Microsoft 
419 | 
420 | **Description:** Refresh of Phi-3-mini model. 
421 | 
422 | ### Phi-3.5-vision instruct (128k)
423 | 
424 | Usage: `llm -m github/Phi-3.5-vision-instruct`
425 | 
426 | **Publisher:** Microsoft 
427 | 
428 | **Description:** Refresh of Phi-3-vision model. 
429 | 
430 | ### Phi-4
431 | 
432 | Usage: `llm -m github/Phi-4`
433 | 
434 | **Publisher:** Microsoft 
435 | 
436 | **Description:** Phi-4 14B, a highly capable model for low latency scenarios. 
437 | 
438 | ### Phi-4-mini-instruct
439 | 
440 | Usage: `llm -m github/Phi-4-mini-instruct`
441 | 
442 | **Publisher:** Microsoft 
443 | 
444 | **Description:** 3.8B parameters Small Language Model outperforming larger models in reasoning, math, coding, and function-calling 
445 | 
446 | ### Phi-4-mini-reasoning
447 | 
448 | Usage: `llm -m github/Phi-4-mini-reasoning`
449 | 
450 | **Publisher:** Microsoft 
451 | 
452 | **Description:** Lightweight math reasoning model optimized for multi-step problem solving 
453 | 
454 | ### Phi-4-multimodal-instruct
455 | 
456 | Usage: `llm -m github/Phi-4-multimodal-instruct`
457 | 
458 | **Publisher:** Microsoft 
459 | 
460 | **Description:** First small multimodal model to have 3 modality inputs (text, audio, image), excelling in quality and efficiency 
461 | 
462 | ### Phi-4-Reasoning
463 | 
464 | Usage: `llm -m github/Phi-4-reasoning`
465 | 
466 | **Publisher:** Microsoft 
467 | 
468 | **Description:** State-of-the-art open-weight reasoning model. 
469 | 
470 | ### Cohere Command A
471 | 
472 | Usage: `llm -m github/cohere-command-a`
473 | 
474 | **Publisher:** Cohere 
475 | 
476 | **Description:** Command A is a highly efficient generative model that excels at agentic and multilingual use cases. 
477 | 
478 | ### Cohere Embed 4
479 | 
480 | Usage: `llm -m github/embed-v-4-0`
481 | 
482 | **Publisher:** Cohere 
483 | 
484 | **Description:** Embed 4 transforms texts and images into numerical vectors 
485 | 
486 | ### OpenAI GPT-4.1
487 | 
488 | Usage: `llm -m github/gpt-4.1`
489 | 
490 | **Publisher:** OpenAI 
491 | 
492 | **Description:** gpt-4.1 outperforms gpt-4o across the board, with major gains in coding, instruction following, and long-context understanding 
493 | 
494 | ### OpenAI GPT-4.1-mini
495 | 
496 | Usage: `llm -m github/gpt-4.1-mini`
497 | 
498 | **Publisher:** OpenAI 
499 | 
500 | **Description:** gpt-4.1-mini outperform gpt-4o-mini across the board, with major gains in coding, instruction following, and long-context handling 
501 | 
502 | ### OpenAI GPT-4.1-nano
503 | 
504 | Usage: `llm -m github/gpt-4.1-nano`
505 | 
506 | **Publisher:** OpenAI 
507 | 
508 | **Description:** gpt-4.1-nano provides gains in coding, instruction following, and long-context handling along with lower latency and cost 
509 | 
510 | ### OpenAI GPT-4o
511 | 
512 | Usage: `llm -m github/gpt-4o`
513 | 
514 | **Publisher:** OpenAI 
515 | 
516 | **Description:** OpenAI's most advanced multimodal model in the gpt-4o family. Can handle both text and image inputs. 
517 | 
518 | ### OpenAI GPT-4o mini
519 | 
520 | Usage: `llm -m github/gpt-4o-mini`
521 | 
522 | **Publisher:** OpenAI 
523 | 
524 | **Description:** An affordable, efficient AI solution for diverse text and image tasks. 
525 | 
526 | ### Grok 3
527 | 
528 | Usage: `llm -m github/grok-3`
529 | 
530 | **Publisher:** xAI 
531 | 
532 | **Description:** Grok 3 is xAI's debut model, pretrained by Colossus at supermassive scale to excel in specialized domains like finance, healthcare, and the law. 
533 | 
534 | ### Grok 3 Mini
535 | 
536 | Usage: `llm -m github/grok-3-mini`
537 | 
538 | **Publisher:** xAI 
539 | 
540 | **Description:** Grok 3 Mini is a lightweight model that thinks before responding. Trained on mathematic and scientific problems, it is great for logic-based tasks. 
541 | 
542 | ### JAIS 30b Chat
543 | 
544 | Usage: `llm -m github/jais-30b-chat`
545 | 
546 | **Publisher:** Core42 
547 | 
548 | **Description:** JAIS 30b Chat is an auto-regressive bilingual LLM for Arabic & English with state-of-the-art capabilities in Arabic. 
549 | 
550 | ### Mistral Medium 3 (25.05)
551 | 
552 | Usage: `llm -m github/mistral-medium-2505`
553 | 
554 | **Publisher:** Mistral AI 
555 | 
556 | **Description:** Mistral Medium 3 is an advanced Large Language Model (LLM) with state-of-the-art reasoning, knowledge, coding and vision capabilities. 
557 | 
558 | ### Mistral Small 3.1
559 | 
560 | Usage: `llm -m github/mistral-small-2503`
561 | 
562 | **Publisher:** Mistral AI 
563 | 
564 | **Description:** Enhanced Mistral Small 3 with multimodal capabilities and a 128k context length. 
565 | 
566 | ### OpenAI o1
567 | 
568 | Usage: `llm -m github/o1`
569 | 
570 | **Publisher:** OpenAI 
571 | 
572 | **Description:** Focused on advanced reasoning and solving complex problems, including math and science tasks. Ideal for applications that require deep contextual understanding and agentic workflows. 
573 | 
574 | ### OpenAI o1-mini
575 | 
576 | Usage: `llm -m github/o1-mini`
577 | 
578 | **Publisher:** OpenAI 
579 | 
580 | **Description:** Smaller, faster, and 80% cheaper than o1-preview, performs well at code generation and small context operations. 
581 | 
582 | ### OpenAI o1-preview
583 | 
584 | Usage: `llm -m github/o1-preview`
585 | 
586 | **Publisher:** OpenAI 
587 | 
588 | **Description:** Focused on advanced reasoning and solving complex problems, including math and science tasks. Ideal for applications that require deep contextual understanding and agentic workflows. 
589 | 
590 | ### OpenAI o3
591 | 
592 | Usage: `llm -m github/o3`
593 | 
594 | **Publisher:** OpenAI 
595 | 
596 | **Description:** o3 includes significant improvements on quality and safety while supporting the existing features of o1 and delivering comparable or better performance. 
597 | 
598 | ### OpenAI o3-mini
599 | 
600 | Usage: `llm -m github/o3-mini`
601 | 
602 | **Publisher:** OpenAI 
603 | 
604 | **Description:** o3-mini includes the o1 features with significant cost-efficiencies for scenarios requiring high performance. 
605 | 
606 | ### OpenAI o4-mini
607 | 
608 | Usage: `llm -m github/o4-mini`
609 | 
610 | **Publisher:** OpenAI 
611 | 
612 | **Description:** o4-mini includes significant improvements on quality and safety while supporting the existing features of o3-mini and delivering comparable or better performance. 
613 | 
614 | ### OpenAI Text Embedding 3 (large)
615 | 
616 | Usage: `llm -m github/text-embedding-3-large`
617 | 
618 | **Publisher:** OpenAI 
619 | 
620 | **Description:** Text-embedding-3 series models are the latest and most capable embedding model from OpenAI. 
621 | 
622 | ### OpenAI Text Embedding 3 (small)
623 | 
624 | Usage: `llm -m github/text-embedding-3-small`
625 | 
626 | **Publisher:** OpenAI 
627 | 
628 | **Description:** Text-embedding-3 series models are the latest and most capable embedding model from OpenAI. 
629 | 
630 | 


--------------------------------------------------------------------------------
/llm_github_models.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import AsyncGenerator, Dict, Iterable, Iterator, List, Optional, Union
  3 | 
  4 | import llm
  5 | from azure.ai.inference import ChatCompletionsClient, EmbeddingsClient
  6 | from azure.ai.inference.aio import ChatCompletionsClient as AsyncChatCompletionsClient
  7 | from azure.ai.inference.models import (
  8 |     AssistantMessage,
  9 |     AudioContentFormat,
 10 |     AudioContentItem,
 11 |     ChatCompletionsToolCall,
 12 |     ChatCompletionsToolDefinition,
 13 |     ChatRequestMessage,
 14 |     CompletionsUsage,
 15 |     ContentItem,
 16 |     FunctionCall,
 17 |     FunctionDefinition,
 18 |     ImageContentItem,
 19 |     ImageDetailLevel,
 20 |     ImageUrl,
 21 |     InputAudio,
 22 |     JsonSchemaFormat,
 23 |     StreamingChatResponseMessageUpdate,
 24 |     StreamingChatResponseToolCallUpdate,
 25 |     SystemMessage,
 26 |     TextContentItem,
 27 |     ToolMessage,
 28 |     UserMessage,
 29 | )
 30 | from azure.core.credentials import AzureKeyCredential
 31 | from llm.models import (
 32 |     AsyncConversation,
 33 |     AsyncModel,
 34 |     AsyncResponse,
 35 |     Attachment,
 36 |     Conversation,
 37 |     EmbeddingModel,
 38 |     Prompt,
 39 |     Response,
 40 | )
 41 | from pydantic import BaseModel
 42 | 
 43 | INFERENCE_ENDPOINT = "https://models.inference.ai.azure.com"
 44 | 
 45 | CHAT_MODELS = [
 46 |     ("AI21-Jamba-1.5-Large", True, False, False, False, ["text"], ["text"]),
 47 |     ("AI21-Jamba-1.5-Mini", True, False, False, False, ["text"], ["text"]),
 48 |     ("Codestral-2501", True, False, False, True, ["text"], ["text"]),
 49 |     ("Cohere-command-r", True, False, False, True, ["text"], ["text"]),
 50 |     ("Cohere-command-r-08-2024", True, False, False, True, ["text"], ["text"]),
 51 |     ("Cohere-command-r-plus", True, False, False, True, ["text"], ["text"]),
 52 |     ("Cohere-command-r-plus-08-2024", True, False, False, True, ["text"], ["text"]),
 53 |     ("DeepSeek-R1", True, False, False, False, ["text"], ["text"]),
 54 |     ("DeepSeek-V3", True, False, False, False, ["text"], ["text"]),
 55 |     ("DeepSeek-V3-0324", True, False, False, False, ["text"], ["text"]),
 56 |     (
 57 |         "Llama-3.2-11B-Vision-Instruct",
 58 |         True,
 59 |         False,
 60 |         False,
 61 |         False,
 62 |         ["text", "image", "audio"],
 63 |         ["text"],
 64 |     ),
 65 |     (
 66 |         "Llama-3.2-90B-Vision-Instruct",
 67 |         True,
 68 |         False,
 69 |         False,
 70 |         False,
 71 |         ["text", "image", "audio"],
 72 |         ["text"],
 73 |     ),
 74 |     ("Llama-3.3-70B-Instruct", True, False, False, False, ["text"], ["text"]),
 75 |     (
 76 |         "Llama-4-Maverick-17B-128E-Instruct-FP8",
 77 |         True,
 78 |         False,
 79 |         False,
 80 |         False,
 81 |         ["text", "image"],
 82 |         ["text"],
 83 |     ),
 84 |     ("Llama-4-Scout-17B-16E-Instruct", True, False, False, False, ["text", "image"], ["text"]),
 85 |     ("MAI-DS-R1", True, False, False, False, ["text"], ["text"]),
 86 |     ("Meta-Llama-3-70B-Instruct", True, False, False, False, ["text"], ["text"]),
 87 |     ("Meta-Llama-3-8B-Instruct", True, False, False, False, ["text"], ["text"]),
 88 |     ("Meta-Llama-3.1-405B-Instruct", True, False, False, False, ["text"], ["text"]),
 89 |     ("Meta-Llama-3.1-70B-Instruct", True, False, False, False, ["text"], ["text"]),
 90 |     ("Meta-Llama-3.1-8B-Instruct", True, False, False, False, ["text"], ["text"]),
 91 |     ("Ministral-3B", True, False, False, True, ["text"], ["text"]),
 92 |     ("Mistral-Large-2411", True, False, False, True, ["text"], ["text"]),
 93 |     ("Mistral-Nemo", True, False, False, True, ["text"], ["text"]),
 94 |     ("Mistral-large", True, False, False, True, ["text"], ["text"]),
 95 |     ("Mistral-large-2407", True, False, False, True, ["text"], ["text"]),
 96 |     ("Mistral-small", True, False, False, True, ["text"], ["text"]),
 97 |     ("Phi-3-medium-128k-instruct", True, False, False, False, ["text"], ["text"]),
 98 |     ("Phi-3-medium-4k-instruct", True, False, False, False, ["text"], ["text"]),
 99 |     ("Phi-3-mini-128k-instruct", True, False, False, False, ["text"], ["text"]),
100 |     ("Phi-3-mini-4k-instruct", True, False, False, False, ["text"], ["text"]),
101 |     ("Phi-3-small-128k-instruct", True, False, False, False, ["text"], ["text"]),
102 |     ("Phi-3-small-8k-instruct", True, False, False, False, ["text"], ["text"]),
103 |     ("Phi-3.5-MoE-instruct", True, False, False, False, ["text"], ["text"]),
104 |     ("Phi-3.5-mini-instruct", True, False, False, False, ["text"], ["text"]),
105 |     ("Phi-3.5-vision-instruct", True, False, False, False, ["text", "image"], None),
106 |     ("Phi-4", True, False, False, False, ["text"], ["text"]),
107 |     ("Phi-4-mini-instruct", True, False, False, False, ["text"], ["text"]),
108 |     ("Phi-4-mini-reasoning", True, False, False, False, ["text"], ["text"]),
109 |     ("Phi-4-multimodal-instruct", True, False, False, False, ["audio", "image", "text"], ["text"]),
110 |     ("Phi-4-reasoning", True, False, False, False, ["text"], ["text"]),
111 |     ("cohere-command-a", True, False, False, True, ["text"], ["text"]),
112 |     ("gpt-4.1", True, True, True, True, ["text", "image"], ["text"]),
113 |     ("gpt-4.1-mini", True, True, True, True, ["text", "image"], ["text"]),
114 |     ("gpt-4.1-nano", True, True, True, True, ["text", "image"], ["text"]),
115 |     ("gpt-4o", True, True, True, True, ["text", "image", "audio"], ["text"]),
116 |     ("gpt-4o-mini", True, True, True, True, ["text", "image", "audio"], ["text"]),
117 |     ("grok-3", True, False, False, True, ["text"], ["text"]),
118 |     ("grok-3-mini", True, False, False, True, ["text"], ["text"]),
119 |     ("jais-30b-chat", True, False, False, False, ["text"], ["text"]),
120 |     ("mistral-medium-2505", True, False, False, True, ["text", "image"], ["text"]),
121 |     ("mistral-small-2503", True, False, False, True, ["text", "image"], ["text"]),
122 |     ("o1", False, True, False, True, ["text", "image"], ["text"]),
123 |     ("o1-mini", False, False, False, False, ["text"], ["text"]),
124 |     ("o1-preview", False, False, False, False, ["text"], ["text"]),
125 |     ("o3", True, False, True, True, ["text", "image"], ["text"]),
126 |     ("o3-mini", False, True, False, True, ["text"], ["text"]),
127 |     ("o4-mini", True, False, True, True, ["text", "image"], ["text"]),
128 | ]
129 | 
130 | EMBEDDING_MODELS = [
131 |     ("Cohere-embed-v3-english", []),
132 |     ("Cohere-embed-v3-multilingual", []),
133 |     ("text-embedding-3-large", [1024, 256]),
134 |     ("text-embedding-3-small", [512]),
135 | ]
136 | 
137 | 
138 | @llm.hookimpl
139 | def register_models(register):
140 |     # Register both sync and async versions of each model
141 |     # TODO: Dynamically fetch this list
142 |     for (
143 |         model_id,
144 |         can_stream,
145 |         supports_schema,
146 |         requires_usage_stream_option,
147 |         supports_tools,
148 |         input_modalities,
149 |         output_modalities,
150 |     ) in CHAT_MODELS:
151 |         register(
152 |             GitHubModels(
153 |                 model_id,
154 |                 can_stream=can_stream,
155 |                 supports_schema=supports_schema,
156 |                 requires_usage_stream_option=requires_usage_stream_option,
157 |                 supports_tools=supports_tools,
158 |                 input_modalities=input_modalities,
159 |                 output_modalities=output_modalities,
160 |             ),
161 |             GitHubAsyncModels(
162 |                 model_id,
163 |                 can_stream=can_stream,
164 |                 supports_schema=supports_schema,
165 |                 requires_usage_stream_option=requires_usage_stream_option,
166 |                 supports_tools=supports_tools,
167 |                 input_modalities=input_modalities,
168 |                 output_modalities=output_modalities,
169 |             ),
170 |         )
171 | 
172 | 
173 | @llm.hookimpl
174 | def register_embedding_models(register):
175 |     # Register embedding models
176 |     for model_id, supported_dimensions in EMBEDDING_MODELS:
177 |         register(GitHubEmbeddingModel(model_id))
178 |         for dimensions in supported_dimensions:
179 |             register(GitHubEmbeddingModel(model_id, dimensions=dimensions))
180 | 
181 | 
182 | IMAGE_ATTACHMENTS = {
183 |     "image/png",
184 |     "image/jpeg",
185 |     "image/webp",
186 |     "image/gif",
187 | }
188 | 
189 | AUDIO_ATTACHMENTS = {
190 |     "audio/wav",
191 |     "audio/mpeg",
192 | }
193 | 
194 | 
195 | def attachment_as_content_item(attachment: Attachment) -> ContentItem:
196 |     if attachment is None or attachment.resolve_type() is None:
197 |         raise ValueError("Attachment cannot be None or empty")
198 | 
199 |     attachment_type: str = attachment.resolve_type()  # type: ignore
200 | 
201 |     if attachment_type.startswith("audio/"):
202 |         audio_format = (
203 |             AudioContentFormat.WAV if attachment_type == "audio/wav" else AudioContentFormat.MP3
204 |         )
205 |         if attachment.path is None:
206 |             raise ValueError("Audio attachment must have a path for audio content")
207 | 
208 |         return AudioContentItem(
209 |             input_audio=InputAudio.load(audio_file=attachment.path, audio_format=audio_format)
210 |         )
211 |     if attachment_type.startswith("image/"):
212 |         if attachment.url:
213 |             return ImageContentItem(
214 |                 image_url=ImageUrl(
215 |                     url=attachment.url,
216 |                     detail=ImageDetailLevel.AUTO,
217 |                 ),
218 |             )
219 |         if attachment.path:
220 |             return ImageContentItem(
221 |                 image_url=ImageUrl.load(
222 |                     image_file=attachment.path,
223 |                     image_format=attachment_type.split("/")[1],
224 |                     detail=ImageDetailLevel.AUTO,
225 |                 ),
226 |             )
227 | 
228 |     raise ValueError(f"Unsupported attachment type: {attachment_type}")
229 | 
230 | 
231 | def build_messages(
232 |     prompt: Prompt, conversation: Optional[Union[Conversation, AsyncConversation]] = None
233 | ) -> List[ChatRequestMessage]:
234 |     messages: List[ChatRequestMessage] = []
235 |     current_system = None
236 |     if conversation is not None:
237 |         for prev_response in conversation.responses:
238 |             if prev_response.prompt.system and prev_response.prompt.system != current_system:
239 |                 messages.append(SystemMessage(prev_response.prompt.system))
240 |                 current_system = prev_response.prompt.system
241 |             if prev_response.attachments:
242 |                 attachment_message: list[ContentItem] = []
243 |                 if prev_response.prompt.prompt:
244 |                     attachment_message.append(TextContentItem(text=prev_response.prompt.prompt))
245 |                 for attachment in prev_response.attachments:
246 |                     attachment_message.append(attachment_as_content_item(attachment))
247 |                 messages.append(UserMessage(attachment_message))
248 |             else:
249 |                 messages.append(UserMessage(prev_response.prompt.prompt))
250 | 
251 |             # Add any tool results from the previous prompt
252 |             for tool_result in prev_response.prompt.tool_results:
253 |                 messages.append(
254 |                     ToolMessage(
255 |                         tool_call_id=tool_result.tool_call_id or "", content=tool_result.output
256 |                     )
257 |                 )
258 | 
259 |             # Add the assistant's response
260 |             assistant_msg = AssistantMessage(prev_response.text_or_raise())  # type: ignore
261 | 
262 |             tool_calls = prev_response.tool_calls_or_raise()  # type: ignore
263 |             if tool_calls:
264 |                 assistant_tool_calls = []
265 |                 for tool_call in tool_calls:
266 |                     assistant_tool_calls.append(
267 |                         ChatCompletionsToolCall(
268 |                             id=tool_call.tool_call_id,
269 |                             function=FunctionCall(
270 |                                 name=tool_call.name, arguments=json.dumps(tool_call.arguments)
271 |                             ),
272 |                         )
273 |                     )
274 | 
275 |                 # Set tool_calls on the assistant message
276 |                 assistant_msg.tool_calls = assistant_tool_calls
277 | 
278 |             messages.append(assistant_msg)
279 | 
280 |     if prompt.system and prompt.system != current_system:
281 |         messages.append(SystemMessage(prompt.system))
282 |     if prompt.attachments:
283 |         attachment_message = []
284 |         if prompt.prompt:
285 |             attachment_message.append(TextContentItem(text=prompt.prompt))
286 |         for attachment in prompt.attachments:
287 |             attachment_message.append(attachment_as_content_item(attachment))
288 |         messages.append(UserMessage(attachment_message))
289 |     elif prompt.prompt:
290 |         messages.append(UserMessage(content=prompt.prompt))
291 | 
292 |     # Add any tool results for the current prompt
293 |     for tool_result in prompt.tool_results:
294 |         messages.append(
295 |             ToolMessage(tool_call_id=tool_result.tool_call_id or "", content=tool_result.output)
296 |         )
297 | 
298 |     return messages
299 | 
300 | 
301 | def set_usage(usage: CompletionsUsage, response: Union[Response, AsyncResponse]) -> None:
302 |     # Recursively remove keys with value 0 and empty dictionaries
303 |     def remove_empty_and_zero(obj):
304 |         if isinstance(obj, dict):
305 |             cleaned = {k: remove_empty_and_zero(v) for k, v in obj.items() if v != 0 and v != {}}
306 |             return {k: v for k, v in cleaned.items() if v is not None and v != {}}
307 |         return obj
308 | 
309 |     details = usage.as_dict()
310 |     details.pop("prompt_tokens", None)
311 |     details.pop("completion_tokens", None)
312 |     details.pop("total_tokens", None)
313 | 
314 |     response.set_usage(
315 |         input=usage.prompt_tokens,
316 |         output=usage.completion_tokens,
317 |         details=remove_empty_and_zero(details),
318 |     )
319 | 
320 | 
321 | def append_streaming_tool_calls(
322 |     tool_calls: Dict[str, StreamingChatResponseToolCallUpdate],
323 |     delta: StreamingChatResponseMessageUpdate,
324 | ):
325 |     if not delta.tool_calls:
326 |         return
327 | 
328 |     for tool_call in delta.tool_calls:
329 |         index = tool_call.get("index")
330 |         if index not in tool_calls:
331 |             tool_calls[index] = tool_call
332 |         else:
333 |             tool_calls[index].function.arguments += tool_call.function.arguments
334 | 
335 | 
336 | def add_tool_calls(
337 |     tool_calls: Iterable[Union[ChatCompletionsToolCall, StreamingChatResponseToolCallUpdate]],
338 |     response: Union[Response, AsyncResponse],
339 | ):
340 |     for tool_call in tool_calls:
341 |         try:
342 |             arguments = json.loads(tool_call.function.arguments)
343 |         except json.JSONDecodeError:
344 |             arguments = {"error": "Invalid JSON in arguments"}
345 | 
346 |         response.add_tool_call(
347 |             llm.ToolCall(
348 |                 tool_call_id=tool_call.id,
349 |                 name=tool_call.function.name,
350 |                 arguments=arguments,
351 |             )
352 |         )
353 | 
354 | 
355 | class _Shared:
356 |     needs_key = "github"
357 |     key_env_var = "GITHUB_MODELS_KEY"
358 | 
359 |     def __init__(
360 |         self,
361 |         model_id: str,
362 |         can_stream: bool = True,
363 |         supports_schema: bool = False,
364 |         requires_usage_stream_option: bool = True,
365 |         supports_tools: bool = False,
366 |         input_modalities: Optional[List[str]] = None,
367 |         output_modalities: Optional[List[str]] = None,
368 |     ):
369 |         self.model_id = f"github/{model_id}"
370 |         self.model_name = model_id
371 |         self.can_stream = can_stream
372 |         self.supports_schema = supports_schema
373 |         self.supports_tools = supports_tools
374 |         self.attachment_types = set()
375 |         if input_modalities and "image" in input_modalities:
376 |             self.attachment_types.update(IMAGE_ATTACHMENTS)
377 |         if input_modalities and "audio" in input_modalities:
378 |             self.attachment_types.update(AUDIO_ATTACHMENTS)
379 | 
380 |         self.input_modalities = input_modalities
381 |         self.output_modalities = output_modalities
382 | 
383 |         self.client_kwargs = {}
384 |         # Use latest version
385 |         self.client_kwargs["api_version"] = "2025-03-01-preview"
386 | 
387 |         self.streaming_model_extras = {}
388 |         if requires_usage_stream_option:
389 |             self.streaming_model_extras["stream_options"] = {
390 |                 "include_usage": True,
391 |             }
392 | 
393 |     # Using the same display string for both the sync and async models
394 |     # makes them not show up twice in `llm models`
395 |     def __str__(self) -> str:
396 |         return f"GitHub Models: {self.model_id}"
397 | 
398 |     def get_tools(self, prompt: Prompt) -> Optional[List[ChatCompletionsToolDefinition]]:
399 |         if not self.supports_tools or not prompt.tools:
400 |             return None
401 | 
402 |         return [
403 |             ChatCompletionsToolDefinition(
404 |                 function=FunctionDefinition(
405 |                     name=t.name,
406 |                     description=t.description or None,
407 |                     parameters=t.input_schema,
408 |                 ),
409 |             )
410 |             for t in prompt.tools
411 |         ]
412 | 
413 | 
414 | class GitHubModels(_Shared, llm.Model):
415 |     def execute(
416 |         self,
417 |         prompt: Prompt,
418 |         stream: bool,
419 |         response: Response,
420 |         conversation: Optional[Conversation],
421 |     ) -> Iterator[str]:
422 |         # unset keys are handled by llm.Model.get_key()
423 |         key: str = self.get_key()  # type: ignore
424 | 
425 |         with ChatCompletionsClient(
426 |             endpoint=INFERENCE_ENDPOINT,
427 |             credential=AzureKeyCredential(key),
428 |             model=self.model_name,
429 |             **self.client_kwargs,
430 |         ) as client:
431 |             response_format = "text"
432 |             if prompt.schema:
433 |                 if not isinstance(prompt.schema, dict) and issubclass(prompt.schema, BaseModel):
434 |                     response_format = JsonSchemaFormat(
435 |                         name="output", schema=prompt.schema.model_json_schema()
436 |                     )
437 |                 else:
438 |                     response_format = JsonSchemaFormat(
439 |                         name="output",
440 |                         schema=prompt.schema,  # type: ignore[variable]
441 |                     )
442 | 
443 |             usage: Optional[CompletionsUsage] = None
444 |             messages = build_messages(prompt, conversation)
445 | 
446 |             tools = self.get_tools(prompt)
447 | 
448 |             if stream:
449 |                 completion = client.complete(
450 |                     messages=messages,
451 |                     stream=True,
452 |                     response_format=response_format,
453 |                     model_extras=self.streaming_model_extras,
454 |                     tools=tools,
455 |                 )
456 |                 tool_calls = {}
457 | 
458 |                 for chunk in completion:
459 |                     usage = usage or chunk.usage
460 | 
461 |                     if len(chunk.choices) == 0:
462 |                         continue
463 | 
464 |                     delta = chunk.choices[0].delta
465 |                     content = delta.content
466 |                     append_streaming_tool_calls(tool_calls, delta)
467 | 
468 |                     if content is not None:
469 |                         yield content
470 | 
471 |                 add_tool_calls(
472 |                     tool_calls.values(),
473 |                     response,
474 |                 )
475 | 
476 |                 response.response_json = None  # TODO
477 |             else:
478 |                 completion = client.complete(
479 |                     messages=messages,
480 |                     stream=False,
481 |                     response_format=response_format,
482 |                     tools=tools,
483 |                 )
484 |                 usage = completion.usage
485 | 
486 |                 tool_calls = completion.choices[0].message.tool_calls or []
487 |                 add_tool_calls(tool_calls, response)
488 | 
489 |                 response.response_json = None  # TODO
490 |                 if completion.choices[0].message.content:
491 |                     yield completion.choices[0].message.content
492 | 
493 |             if usage is not None:
494 |                 set_usage(usage, response)
495 | 
496 | 
497 | class GitHubAsyncModels(_Shared, AsyncModel):
498 |     async def execute(
499 |         self,
500 |         prompt: Prompt,
501 |         stream: bool,
502 |         response: AsyncResponse,
503 |         conversation: Optional[AsyncConversation],
504 |     ) -> AsyncGenerator[str, None]:
505 |         key = self.get_key()
506 | 
507 |         async with AsyncChatCompletionsClient(
508 |             endpoint=INFERENCE_ENDPOINT,
509 |             credential=AzureKeyCredential(key),  # type: ignore[variable]
510 |             model=self.model_name,
511 |             **self.client_kwargs,
512 |         ) as client:
513 |             response_format = "text"
514 |             if prompt.schema:
515 |                 if not isinstance(prompt.schema, dict) and issubclass(prompt.schema, BaseModel):
516 |                     response_format = JsonSchemaFormat(
517 |                         name="output", schema=prompt.schema.model_json_schema()
518 |                     )
519 |                 else:
520 |                     response_format = JsonSchemaFormat(
521 |                         name="output",
522 |                         schema=prompt.schema,  # type: ignore[variable]
523 |                     )
524 | 
525 |             usage: Optional[CompletionsUsage] = None
526 |             messages = build_messages(prompt, conversation)
527 | 
528 |             tools = self.get_tools(prompt)
529 | 
530 |             if stream:
531 |                 completion = await client.complete(
532 |                     messages=messages,
533 |                     stream=True,
534 |                     response_format=response_format,
535 |                     model_extras=self.streaming_model_extras,
536 |                     tools=tools,
537 |                 )
538 | 
539 |                 tool_calls = {}
540 |                 async for chunk in completion:
541 |                     usage = usage or chunk.usage
542 | 
543 |                     if len(chunk.choices) == 0:
544 |                         continue
545 | 
546 |                     delta = chunk.choices[0].delta
547 |                     content = delta.content
548 |                     append_streaming_tool_calls(tool_calls, delta)
549 | 
550 |                     if content is not None:
551 |                         yield content
552 | 
553 |                 add_tool_calls(
554 |                     tool_calls.values(),
555 |                     response,
556 |                 )
557 | 
558 |                 response.response_json = None  # TODO
559 |             else:
560 |                 completion = await client.complete(
561 |                     messages=messages,
562 |                     stream=False,
563 |                     response_format=response_format,
564 |                     tools=tools,
565 |                 )
566 |                 usage = usage or completion.usage
567 | 
568 |                 tool_calls = completion.choices[0].message.tool_calls or []
569 |                 add_tool_calls(tool_calls, response)
570 | 
571 |                 response.response_json = None  # TODO
572 |                 if completion.choices[0].message.content:
573 |                     yield completion.choices[0].message.content
574 | 
575 |             if usage is not None:
576 |                 set_usage(usage, response)
577 | 
578 | 
579 | class GitHubEmbeddingModel(EmbeddingModel):
580 |     needs_key = "github"
581 |     key_env_var = "GITHUB_MODELS_KEY"
582 |     batch_size = 100
583 | 
584 |     def __init__(self, model_id: str, dimensions: Optional[int] = None):
585 |         self.model_id = f"github/{model_id}"
586 |         if dimensions is not None:
587 |             self.model_id += f"-{dimensions}"
588 | 
589 |         self.model_name = model_id
590 |         self.dimensions = dimensions
591 | 
592 |     def embed_batch(self, items: Iterable[Union[str, bytes]]) -> Iterator[List[float]]:
593 |         if not items:
594 |             return iter([])
595 | 
596 |         key = self.get_key()
597 |         client = EmbeddingsClient(
598 |             endpoint=INFERENCE_ENDPOINT,
599 |             credential=AzureKeyCredential(key),  # type: ignore
600 |         )
601 | 
602 |         # TODO: Handle iterable of bytes
603 | 
604 |         kwargs = {
605 |             "input": items,
606 |             "model": self.model_name,
607 |         }
608 |         if self.dimensions:
609 |             kwargs["dimensions"] = self.dimensions
610 | 
611 |         response = client.embed(**kwargs)
612 |         return ([float(x) for x in item.embedding] for item in response.data)
613 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "llm-github-models"
 3 | version = "0.15"
 4 | description = "LLM plugin to access GitHub Models API"
 5 | readme = "README.md"
 6 | authors = [{name = "Anthony Shaw"}]
 7 | license = {text = "Apache-2.0"}
 8 | classifiers = [
 9 |     "License :: OSI Approved :: Apache Software License"
10 | ]
11 | dependencies = [
12 |     "aiohttp>=3.11.18",
13 |     "llm>=0.26",
14 |     "azure-ai-inference>=1.0.0b8",
15 | ]
16 | 
17 | [project.urls]
18 | Homepage = "https://github.com/tonybaloney/llm-github-models"
19 | Changelog = "https://github.com/tonybaloney/llm-github-models/releases"
20 | Issues = "https://github.com/tonybaloney/llm-github-models/issues"
21 | CI = "https://github.com/tonybaloney/llm-github-models/actions"
22 | 
23 | [project.entry-points.llm]
24 | github = "llm_github_models"
25 | 
26 | [project.optional-dependencies]
27 | test = ["pytest", "pytest-recording", "pytest-asyncio", "ruff", "pyright"]
28 | 
29 | [tool.ruff]
30 | line-length = 100
31 | 
32 | [tool.ruff.lint]
33 | select = ["E", "F", "I"]
34 | 


--------------------------------------------------------------------------------
/tests/files/kick.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonybaloney/llm-github-models/84db4b65a8c1cdb2b619a4ef3c972b4380923a9f/tests/files/kick.wav


--------------------------------------------------------------------------------
/tests/files/salmon.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonybaloney/llm-github-models/84db4b65a8c1cdb2b619a4ef3c972b4380923a9f/tests/files/salmon.jpeg


--------------------------------------------------------------------------------
/tests/test_llm_github_embeddings.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import patch
  2 | 
  3 | import pytest
  4 | from azure.ai.inference.models import EmbeddingItem, EmbeddingsResult
  5 | 
  6 | from llm_github_models import EMBEDDING_MODELS, GitHubEmbeddingModel
  7 | 
  8 | EMBEDDING_MODEL_IDS = [f"github/{model}" for model in EMBEDDING_MODELS]
  9 | 
 10 | 
 11 | @pytest.mark.parametrize("model_id", EMBEDDING_MODELS)
 12 | def test_embedding_model_initialization(model_id: str):
 13 |     """Test that embedding models are initialized correctly."""
 14 |     embedding_model = GitHubEmbeddingModel(model_id)
 15 |     assert embedding_model.model_id == f"github/{model_id}"
 16 |     assert embedding_model.model_name == model_id
 17 | 
 18 | 
 19 | @patch("llm_github_models.EmbeddingsClient", autospec=True)
 20 | def test_embed_single_text(MockEmbeddingsClient):
 21 |     """Test embedding a single text."""
 22 |     # Setup mock
 23 |     mock_instance = MockEmbeddingsClient.return_value
 24 | 
 25 |     # Mock the response
 26 |     mock_embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
 27 |     mock_embedding_item = EmbeddingItem(embedding=mock_embedding, index=0)
 28 |     mock_result = EmbeddingsResult(data=[mock_embedding_item])
 29 |     mock_instance.embed.return_value = mock_result
 30 | 
 31 |     # Create model and call embed
 32 |     model = GitHubEmbeddingModel("test-model")
 33 |     # Patch the get_key method to avoid actual key retrieval
 34 |     with patch.object(model, "get_key", return_value="test-key"):
 35 |         result = model.embed_batch(["This is a test text"])
 36 | 
 37 |     # Assertions
 38 |     MockEmbeddingsClient.assert_called_once()
 39 |     mock_instance.embed.assert_called_once_with(
 40 |         model="test-model",
 41 |         input=["This is a test text"],
 42 |     )
 43 | 
 44 |     result = list(result)
 45 |     assert len(result) == 1
 46 |     assert result[0] == [0.1, 0.2, 0.3, 0.4, 0.5]
 47 | 
 48 | 
 49 | @patch("llm_github_models.EmbeddingsClient", autospec=True)
 50 | def test_embed_with_dimensions(MockEmbeddingsClient):
 51 |     """Test embedding with a custom dimensions."""
 52 |     # Setup mock
 53 |     mock_instance = MockEmbeddingsClient.return_value
 54 | 
 55 |     # Mock the response
 56 |     mock_embedding = [0.1, 0.2, 0.3, 0.4, 0.5]
 57 |     mock_embedding_item = EmbeddingItem(embedding=mock_embedding, index=0)
 58 |     mock_result = EmbeddingsResult(data=[mock_embedding_item])
 59 |     mock_instance.embed.return_value = mock_result
 60 | 
 61 |     # Create model and call embed
 62 |     model = GitHubEmbeddingModel("test-model", 1234)
 63 |     # Patch the get_key method to avoid actual key retrieval
 64 |     with patch.object(model, "get_key", return_value="test-key"):
 65 |         result = model.embed_batch(["This is a test text"])
 66 | 
 67 |     # Assertions
 68 |     MockEmbeddingsClient.assert_called_once()
 69 |     mock_instance.embed.assert_called_once_with(
 70 |         model="test-model",
 71 |         input=["This is a test text"],
 72 |         dimensions=1234,
 73 |     )
 74 | 
 75 |     result = list(result)
 76 |     assert len(result) == 1
 77 |     assert result[0] == [0.1, 0.2, 0.3, 0.4, 0.5]
 78 | 
 79 | 
 80 | @patch("llm_github_models.EmbeddingsClient", autospec=True)
 81 | def test_embed_multiple_texts(MockEmbeddingsClient):
 82 |     """Test embedding multiple texts."""
 83 |     # Setup mock
 84 |     mock_instance = MockEmbeddingsClient.return_value
 85 | 
 86 |     # Mock the response for multiple embeddings
 87 |     mock_embedding1 = [0.1, 0.2, 0.3]
 88 |     mock_embedding2 = [0.4, 0.5, 0.6]
 89 | 
 90 |     mock_embedding_item1 = EmbeddingItem(embedding=mock_embedding1, index=0)
 91 |     mock_embedding_item2 = EmbeddingItem(embedding=mock_embedding2, index=1)
 92 | 
 93 |     mock_result = EmbeddingsResult(data=[mock_embedding_item1, mock_embedding_item2])
 94 | 
 95 |     mock_instance.embed.return_value = mock_result
 96 | 
 97 |     # Create model and call embed
 98 |     model = GitHubEmbeddingModel("test-model")
 99 |     # Patch the get_key method to avoid actual key retrieval
100 |     with patch.object(model, "get_key", return_value="test-key"):
101 |         texts = ["First text", "Second text"]
102 |         result = model.embed_batch(texts)
103 | 
104 |     # Assertions
105 |     MockEmbeddingsClient.assert_called_once()
106 |     mock_instance.embed.assert_called_once_with(
107 |         model="test-model",
108 |         input=texts,
109 |     )
110 |     result = list(result)
111 |     assert len(result) == 2
112 |     assert result[0] == [0.1, 0.2, 0.3]
113 |     assert result[1] == [0.4, 0.5, 0.6]
114 | 
115 | 
116 | @patch("llm_github_models.EmbeddingsClient", autospec=True)
117 | def test_embed_empty_list(MockEmbeddingsClient):
118 |     model = GitHubEmbeddingModel("text-embedding-3-small")
119 |     with patch.object(model, "get_key", return_value="key"):
120 |         result = model.embed_batch([])
121 |     assert list(result) == []
122 | 
123 |     MockEmbeddingsClient.assert_not_called()
124 | 
125 | 
126 | def test_register_embedding_models():
127 |     registered = []
128 | 
129 |     def fake_register(instance):
130 |         registered.append(instance)
131 | 
132 |     from llm_github_models import register_embedding_models
133 | 
134 |     register_embedding_models(fake_register)
135 | 
136 |     def check_model(model_id, dimensions=None):
137 |         suffix = f"-{dimensions}" if dimensions else ""
138 |         m = next(m for m in registered if m.model_id == f"github/{model_id}{suffix}")
139 | 
140 |         assert isinstance(m, GitHubEmbeddingModel)
141 |         assert m.model_name == model_id
142 |         assert m.dimensions == dimensions
143 | 
144 |         registered.remove(m)
145 | 
146 |     for model_id, supported_dimensions in EMBEDDING_MODELS:
147 |         check_model(model_id)
148 | 
149 |         for dims in supported_dimensions:
150 |             check_model(model_id, dims)
151 | 
152 |     assert not registered, "More models registered than expected"
153 | 


--------------------------------------------------------------------------------
/tests/test_llm_github_models.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pathlib
  3 | from unittest.mock import Mock, patch
  4 | 
  5 | import pytest
  6 | from azure.ai.inference.models import (
  7 |     AudioContentItem,
  8 |     CompletionsUsage,
  9 |     ImageContentItem,
 10 |     ImageUrl,
 11 |     InputAudio,
 12 |     StreamingChatChoiceUpdate,
 13 |     StreamingChatCompletionsUpdate,
 14 |     SystemMessage,
 15 |     UserMessage,
 16 | )
 17 | from llm import get_async_model, get_model
 18 | from llm.models import Attachment, Conversation, Prompt, Response
 19 | from pydantic import BaseModel
 20 | 
 21 | from llm_github_models import GitHubModels, build_messages, set_usage
 22 | 
 23 | MODELS = ["github/gpt-4.1-mini", "github/gpt-4o-mini", "github/Llama-3.2-11B-Vision-Instruct"]
 24 | 
 25 | 
 26 | @pytest.mark.parametrize("model", MODELS)
 27 | def test_build_messages_no_conversation(model: str):
 28 |     # Test build_messages with conversation=None and a basic prompt without system.
 29 |     dummy_prompt = Prompt(prompt="Hello from prompt", system=None, attachments=[], model=model)
 30 |     messages = build_messages(dummy_prompt, None)
 31 |     # Should add one UserMessage from prompt since conversation is None.
 32 |     assert isinstance(messages, list)
 33 |     # Expecting only one message: UserMessage with content "Hello from prompt"
 34 |     assert len(messages) == 1
 35 |     msg = messages[0]
 36 |     assert isinstance(msg, UserMessage)
 37 |     # For a simple user message, content is stored in 'content'
 38 |     # Compare against expected message content.
 39 |     assert msg.content == "Hello from prompt"
 40 | 
 41 | 
 42 | @pytest.mark.parametrize("model", MODELS)
 43 | def test_build_messages_with_conversation_no_prompt_system(model: str):
 44 |     # Create a dummy conversation with one response.
 45 |     dummy_prompt = Prompt(prompt="Hello from prompt", system=None, attachments=[], model=model)
 46 |     _model = get_model(model)
 47 |     # The response has a system message and a user message.
 48 |     dummy_response = Response(
 49 |         prompt=Prompt(prompt="Hello from last time", system=None, attachments=[], model=model),
 50 |         model=_model,
 51 |         stream=False,
 52 |     )
 53 |     dummy_convo = Conversation(responses=[dummy_response], model=_model)
 54 |     # Create a prompt with no system and without attachments.
 55 |     messages = build_messages(dummy_prompt, dummy_convo)
 56 |     assert len(messages) == 3
 57 | 
 58 | 
 59 | @pytest.mark.parametrize("model", MODELS)
 60 | def test_build_messages_with_conversation_prompt_system(model: str):
 61 |     # Create a dummy conversation with one response.
 62 |     dummy_prompt = Prompt(
 63 |         prompt="Hello from prompt", system="You are a hawk", attachments=[], model=model
 64 |     )
 65 |     _model = get_model(model)
 66 |     # The response has a system message and a user message.
 67 |     dummy_response = Response(
 68 |         prompt=Prompt(
 69 |             prompt="Hello from last time",
 70 |             system="You are a hawk",
 71 |             attachments=[],
 72 |             model=model,
 73 |         ),
 74 |         model=_model,
 75 |         stream=False,
 76 |     )
 77 |     dummy_convo = Conversation(responses=[dummy_response], model=_model)
 78 |     # Create a prompt with no system and without attachments.
 79 |     messages = build_messages(dummy_prompt, dummy_convo)
 80 |     assert len(messages) == 4
 81 |     # First message should be a system message.
 82 |     assert isinstance(messages[0], SystemMessage)
 83 |     assert messages[0].content == "You are a hawk"
 84 | 
 85 | 
 86 | def test_build_messages_with_image_path_attachment():
 87 |     # Create a dummy attachment object for an image.
 88 |     model: str = "gpt-4o"
 89 |     attachment = Attachment(
 90 |         path=pathlib.Path("tests/files/salmon.jpeg"), url=None, type="image/jpeg"
 91 |     )
 92 |     dummy_attachment = attachment
 93 |     # Create a prompt with an attachment and prompt text.
 94 |     dummy_prompt = Prompt(
 95 |         prompt="Here is an image:",
 96 |         system=None,
 97 |         model=model,
 98 |         attachments=[dummy_attachment],
 99 |     )
100 |     # No conversation provided.
101 |     messages = build_messages(dummy_prompt, None)
102 |     # For a prompt with attachments, build_messages creates one UserMessage whose content is a list.
103 |     assert len(messages) == 1
104 |     msg = messages[0]
105 |     assert isinstance(msg, UserMessage)
106 |     # The content should be a list with two items: TextContentItem and ImageContentItem.
107 |     # Validate type and content.
108 |     content_list = msg.content
109 |     assert isinstance(content_list, list)
110 |     assert len(content_list) == 2
111 |     image_item = content_list[1]
112 |     assert isinstance(image_item, ImageContentItem)
113 |     # Check that image_item.image_url is an ImageUrl with the correct url.
114 |     assert isinstance(image_item.image_url, ImageUrl)
115 |     assert image_item.image_url.url.startswith("data:image/jpeg;base64,")
116 | 
117 | 
118 | def test_build_messages_with_image_url_attachments():
119 |     # Create a dummy attachment object for an image.
120 |     model: str = "gpt-4o"
121 |     attachment = Attachment(path=None, url="http://dummy.image/url.png", type="image/png")
122 |     dummy_attachment = attachment
123 |     # Create a prompt with an attachment and prompt text.
124 |     dummy_prompt = Prompt(
125 |         prompt="Here is an image:",
126 |         system=None,
127 |         model=model,
128 |         attachments=[dummy_attachment],
129 |     )
130 |     # No conversation provided.
131 |     messages = build_messages(dummy_prompt, None)
132 |     # For a prompt with attachments, build_messages creates one UserMessage whose content is a list.
133 |     assert len(messages) == 1
134 |     msg = messages[0]
135 |     assert isinstance(msg, UserMessage)
136 |     # The content should be a list with two items: TextContentItem and ImageContentItem.
137 |     # Validate type and content.
138 |     content_list = msg.content
139 |     assert isinstance(content_list, list)
140 |     assert len(content_list) == 2
141 |     image_item = content_list[1]
142 |     assert isinstance(image_item, ImageContentItem)
143 |     # Check that image_item.image_url is an ImageUrl with the correct url.
144 |     assert isinstance(image_item.image_url, ImageUrl)
145 |     assert image_item.image_url.url == "http://dummy.image/url.png"
146 | 
147 | 
148 | def test_build_messages_with_audio_path_attachment():
149 |     # Create a dummy attachment object for an image.
150 |     model: str = "gpt-4o"
151 |     attachment = Attachment(path=pathlib.Path("tests/files/kick.wav"), url=None, type="audio/wav")
152 |     dummy_attachment = attachment
153 |     # Create a prompt with an attachment and prompt text.
154 |     dummy_prompt = Prompt(
155 |         prompt="Here is an audio clip:",
156 |         system=None,
157 |         model=model,
158 |         attachments=[dummy_attachment],
159 |     )
160 |     # No conversation provided.
161 |     messages = build_messages(dummy_prompt, None)
162 |     # For a prompt with attachments, build_messages creates one UserMessage whose content is a list.
163 |     assert len(messages) == 1
164 |     msg = messages[0]
165 |     assert isinstance(msg, UserMessage)
166 |     # The content should be a list with two items: TextContentItem and ImageContentItem.
167 |     # Validate type and content.
168 |     content_list = msg.content
169 |     assert isinstance(content_list, list)
170 |     assert len(content_list) == 2
171 |     audio_item = content_list[1]
172 |     assert isinstance(audio_item, AudioContentItem)
173 |     # Check that image_item.image_url is an ImageUrl with the correct url.
174 |     assert isinstance(audio_item.input_audio, InputAudio)
175 |     assert audio_item.input_audio.data.startswith("UklGRuwiAAB")
176 |     assert audio_item.input_audio.format == "wav"
177 |     assert audio_item.input_audio.data.endswith("AAAAA=")
178 | 
179 | 
180 | class DogSchema(BaseModel):
181 |     """
182 |     A schema for a dog with a name and age.
183 |     """
184 | 
185 |     name: str
186 |     age: int
187 |     one_sentence_bio: str
188 | 
189 | 
190 | def test_schema_with_unsupported_model():
191 |     """
192 |     Test that requesting a schema for an unsupported model raises an error.
193 |     """
194 |     model = get_model("github/Mistral-Nemo")
195 | 
196 |     with pytest.raises(ValueError):
197 |         model.prompt("Invent a good dog", schema=DogSchema)
198 | 
199 | 
200 | def test_schema_with_supported_model():
201 |     """
202 |     Test that requesting a schema for a supported model works.
203 |     """
204 |     model = get_model("github/gpt-4.1-mini")
205 | 
206 |     response = model.prompt("Invent a good dog named Buddy", schema=DogSchema)
207 |     dog = json.loads(response.text())
208 |     assert dog["name"] == "Buddy"
209 | 
210 | 
211 | @pytest.mark.asyncio
212 | async def test_async_model_prompt():
213 |     """
214 |     Test that the async model prompt works correctly.
215 |     """
216 |     model = get_async_model("github/gpt-4.1-mini")
217 |     response = await model.prompt("What is the capital of France?")
218 |     assert "Paris" in await response.text()
219 | 
220 | 
221 | @patch("llm_github_models.ChatCompletionsClient", autospec=True)
222 | def test_doesnt_request_streaming_usage_when_not_required(MockChatCompletionsClient):
223 |     # Setup mock
224 |     mock_update = StreamingChatCompletionsUpdate(
225 |         {
226 |             "choices": [StreamingChatChoiceUpdate({"delta": {"content": "Paris"}})],
227 |         }
228 |     )
229 | 
230 |     # `with ChatCompletionsClient(...) as client:`
231 |     mock_instance = MockChatCompletionsClient.return_value.__enter__.return_value
232 | 
233 |     # `for chunk in client.complete(...)`
234 |     mock_instance.complete.return_value.__iter__.return_value = [mock_update]
235 | 
236 |     model = GitHubModels("test-model", requires_usage_stream_option=False)
237 | 
238 |     # Patch the get_key method to avoid actual key retrieval
239 |     with patch.object(model, "get_key", return_value="test-key"):
240 |         result = model.prompt("What is the capital of France", stream=True)
241 | 
242 |     assert result.text() == "Paris"
243 | 
244 |     # Assertions
245 |     call_kwargs = mock_instance.complete.call_args.kwargs
246 |     assert call_kwargs["model_extras"] == {}, (
247 |         "model_extras should be empty when requires_usage_stream_option is False"
248 |     )
249 | 
250 | 
251 | def test_set_usage():
252 |     usage = CompletionsUsage(
253 |         {
254 |             "completion_tokens": 10,
255 |             "prompt_tokens": 5,
256 |             "extra": {
257 |                 "value": 123,
258 |                 "inner_empty": {},
259 |                 "inner_zero": 0,
260 |             },
261 |             "other": "data",
262 |             "zero": 0,
263 |             "empty": {},
264 |         }
265 |     )
266 | 
267 |     captured_usage = {}
268 | 
269 |     def usage_callback(input=None, output=None, details=None):
270 |         captured_usage["input"] = input
271 |         captured_usage["output"] = output
272 |         captured_usage["details"] = details
273 | 
274 |     mock_response = Mock(spec=Response)
275 |     mock_response.set_usage.side_effect = usage_callback
276 | 
277 |     set_usage(usage, mock_response)
278 | 
279 |     assert captured_usage["input"] == 5
280 |     assert captured_usage["output"] == 10
281 | 
282 |     # Everything that is 0 or empty should be filtered out.
283 |     assert captured_usage["details"] == {
284 |         "extra": {
285 |             "value": 123,
286 |         },
287 |         "other": "data",
288 |     }
289 | 
290 | 
291 | def test_sync_returns_usage():
292 |     """
293 |     Test that the sync model returns usage information for streaming and non-streaming.
294 |     """
295 |     model = get_model("github/gpt-4.1-mini")
296 | 
297 |     response = model.prompt("What is the capital of France?")
298 |     usage = response.usage()
299 |     assert_has_usage(usage)
300 | 
301 |     response = model.prompt("What is the capital of France?", stream=True)
302 |     usage = response.usage()
303 |     assert_has_usage(usage)
304 | 
305 | 
306 | @pytest.mark.asyncio
307 | async def test_async_returns_usage():
308 |     """
309 |     Test that the async model returns usage information for streaming and non-streaming.
310 |     """
311 |     model = get_async_model("github/gpt-4.1-mini")
312 | 
313 |     response = await model.prompt("What is the capital of France?")
314 |     usage = await response.usage()
315 |     assert_has_usage(usage)
316 | 
317 |     response = await model.prompt("What is the capital of France?", stream=True)
318 |     usage = await response.usage()
319 |     assert_has_usage(usage)
320 | 
321 | 
322 | def assert_has_usage(usage):
323 |     """
324 |     Helper function to assert that usage has input and output tokens.
325 |     """
326 |     assert usage is not None
327 |     assert usage.input is not None, "Usage input should not be None"
328 |     assert usage.input > 0, "Usage input should be greater than 0"
329 |     assert usage.output is not None, "Usage output should not be None"
330 |     assert usage.output > 0, "Usage output should be greater than 0"
331 | 


--------------------------------------------------------------------------------
/tests/test_tool_support.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import pytest
  4 | from azure.ai.inference.models import StreamingChatResponseMessageUpdate
  5 | from llm import get_async_model, get_model
  6 | 
  7 | from llm_github_models import append_streaming_tool_calls
  8 | 
  9 | 
 10 | def test_model_supports_tools():
 11 |     model = get_model("github/gpt-4o-mini")
 12 |     assert model.supports_tools is True
 13 | 
 14 | 
 15 | def test_append_streaming_tool_calls():
 16 |     tool_calls = {}
 17 | 
 18 |     # call_1 => multiply
 19 |     append_streaming_tool_calls(
 20 |         tool_calls,
 21 |         StreamingChatResponseMessageUpdate(
 22 |             {
 23 |                 "tool_calls": [
 24 |                     {
 25 |                         "id": "call_1",
 26 |                         "index": 0,
 27 |                         "function": {"name": "multiply", "arguments": ""},
 28 |                     }
 29 |                 ]
 30 |             }
 31 |         ),
 32 |     )
 33 | 
 34 |     # call_1 => multiply(x:
 35 |     # call_2 => add(x: 1,
 36 |     append_streaming_tool_calls(
 37 |         tool_calls,
 38 |         StreamingChatResponseMessageUpdate(
 39 |             {
 40 |                 "tool_calls": [
 41 |                     {
 42 |                         "index": 0,
 43 |                         "function": {"arguments": '{ "x": '},
 44 |                     },
 45 |                     {
 46 |                         "id": "call_2",
 47 |                         "index": 1,
 48 |                         "function": {"name": "add", "arguments": '{ "x": 1,'},
 49 |                     },
 50 |                 ]
 51 |             }
 52 |         ),
 53 |     )
 54 | 
 55 |     # call_1 => multiply(x: 2, y: 3)
 56 |     # call_2 => add(x: 1, y:
 57 |     append_streaming_tool_calls(
 58 |         tool_calls,
 59 |         StreamingChatResponseMessageUpdate(
 60 |             {
 61 |                 "tool_calls": [
 62 |                     {
 63 |                         "index": 0,
 64 |                         "function": {"arguments": '2, "y": 3}'},
 65 |                     },
 66 |                     {
 67 |                         "index": 1,
 68 |                         "function": {"name": "add", "arguments": ' "y":'},
 69 |                     },
 70 |                 ]
 71 |             }
 72 |         ),
 73 |     )
 74 | 
 75 |     # call_1 => multiply(x: 2, y: 3)
 76 |     # call_2 => add(x: 1, y: 3)
 77 |     append_streaming_tool_calls(
 78 |         tool_calls,
 79 |         StreamingChatResponseMessageUpdate(
 80 |             {
 81 |                 "tool_calls": [
 82 |                     {
 83 |                         "index": 1,
 84 |                         "function": {"name": "add", "arguments": " 3 }"},
 85 |                     },
 86 |                 ]
 87 |             }
 88 |         ),
 89 |     )
 90 | 
 91 |     assert len(tool_calls) == 2
 92 | 
 93 |     assert tool_calls[0].id == "call_1"
 94 |     assert tool_calls[0].function.name == "multiply"
 95 |     assert json.loads(tool_calls[0].function.arguments) == {"x": 2, "y": 3}
 96 | 
 97 |     assert tool_calls[1].id == "call_2"
 98 |     assert tool_calls[1].function.name == "add"
 99 |     assert json.loads(tool_calls[1].function.arguments) == {"x": 1, "y": 3}
100 | 
101 | 
102 | @pytest.mark.parametrize("stream", [True, False])
103 | def test_sync_uses_tools(stream):
104 |     model = get_model("github/gpt-4o-mini")
105 | 
106 |     # Create a prompt with a tool
107 |     def multiply(x: int, y: int) -> int:
108 |         """Multiply two numbers."""
109 |         return x * y
110 | 
111 |     chain = model.chain("What is 34234 * 213345?", tools=[multiply], stream=stream).responses()  # type: ignore
112 | 
113 |     tool_call_resp = next(chain)
114 | 
115 |     tool_calls = tool_call_resp.tool_calls()
116 |     assert tool_calls is not None
117 |     assert len(tool_calls) == 1
118 |     assert tool_calls[0].name == "multiply"
119 |     assert tool_calls[0].arguments == {"x": 34234, "y": 213345}
120 | 
121 |     # Sometimes it likes to add commas to the output number
122 |     response_text = next(chain).text().replace(",", "")
123 |     assert "7303652730" in response_text
124 | 
125 | 
126 | @pytest.mark.parametrize("stream", [True, False])
127 | @pytest.mark.asyncio
128 | async def test_async_uses_tools(stream):
129 |     model = get_async_model("github/gpt-4o-mini")
130 | 
131 |     # Create a prompt with a tool
132 |     def multiply(x: int, y: int) -> int:
133 |         """Multiply two numbers."""
134 |         return x * y
135 | 
136 |     chain = model.chain("What is 34234 * 213345?", tools=[multiply], stream=stream).responses()  # type: ignore
137 | 
138 |     responses = []
139 |     async for resp in chain:
140 |         responses.append(resp)
141 | 
142 |     tool_call_resp = responses[0]
143 | 
144 |     tool_calls = await tool_call_resp.tool_calls()
145 |     assert tool_calls is not None
146 |     assert len(tool_calls) == 1
147 |     assert tool_calls[0].name == "multiply"
148 |     assert tool_calls[0].arguments == {"x": 34234, "y": 213345}
149 | 
150 |     # Sometimes it likes to add commas to the output number
151 |     response_text = (await responses[1].text()).replace(",", "")
152 |     assert "7303652730" in response_text
153 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | # Updating models
2 | 
3 | 1. `python ./download_models_json.py`
4 | 1. `python ./parse_models_json.py`
5 | 1. Copy CHAT_MODELS and EMBEDDING_MODELS to `../llm_github_models.py`
6 | 1. Run `ruff format llm_github_models.py`
7 | 1. Move `models.fragment.md` to `../README.md`
8 | 


--------------------------------------------------------------------------------
/tools/download_models_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import requests
 4 | 
 5 | url = "https://api.catalog.azureml.ms/asset-gallery/v1.0/models"
 6 | headers = {"Content-Type": "application/json"}
 7 | filters = {
 8 |     "filters": [
 9 |         {"field": "freePlayground", "operator": "eq", "values": ["true"]},
10 |         {"field": "labels", "operator": "eq", "values": ["latest"]},
11 |     ],
12 |     "order": [{"field": "name", "direction": "asc"}],
13 | }
14 | 
15 | all_models = []
16 | continuation_token = None
17 | 
18 | while True:
19 |     payload = filters.copy()
20 |     if continuation_token:
21 |         payload["continuationToken"] = continuation_token
22 | 
23 |     print("Fetching models...")
24 |     response = requests.post(url, headers=headers, json=payload)
25 |     response.raise_for_status()
26 | 
27 |     data = response.json()
28 |     all_models.extend(data.get("summaries", []))
29 | 
30 |     continuation_token = data.get("continuationToken")
31 |     if continuation_token:
32 |         print(f"Continuation token: {continuation_token}")
33 |     if not continuation_token:
34 |         break
35 | 
36 | print("Saving models to models.json...")
37 | with open("models.json", "w") as f:
38 |     json.dump(all_models, f, indent=4)
39 | 
40 | print(f"Saved {len(all_models)} models to models.json")
41 | 


--------------------------------------------------------------------------------
/tools/models.fragment.md:
--------------------------------------------------------------------------------
  1 | ## Supported Models
  2 | 
  3 | ### Chat Models
  4 | 
  5 | | Model Name | Streaming | Schemas | Tools | Input Modalities | Output Modalities |
  6 | |------------|-----------|---------|-------|------------------|-------------------|
  7 | | AI21-Jamba-1.5-Large | ✅ | ❌ | ❌ | text | text |
  8 | | AI21-Jamba-1.5-Mini | ✅ | ❌ | ❌ | text | text |
  9 | | Codestral-2501 | ✅ | ❌ | ✅ | text | text |
 10 | | Cohere-command-r | ✅ | ❌ | ✅ | text | text |
 11 | | Cohere-command-r-08-2024 | ✅ | ❌ | ✅ | text | text |
 12 | | Cohere-command-r-plus | ✅ | ❌ | ✅ | text | text |
 13 | | Cohere-command-r-plus-08-2024 | ✅ | ❌ | ✅ | text | text |
 14 | | DeepSeek-R1 | ✅ | ❌ | ❌ | text | text |
 15 | | DeepSeek-V3 | ✅ | ❌ | ❌ | text | text |
 16 | | DeepSeek-V3-0324 | ✅ | ❌ | ❌ | text | text |
 17 | | Llama-3.2-11B-Vision-Instruct | ✅ | ❌ | ❌ | text, image, audio | text |
 18 | | Llama-3.2-90B-Vision-Instruct | ✅ | ❌ | ❌ | text, image, audio | text |
 19 | | Llama-3.3-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 20 | | Llama-4-Maverick-17B-128E-Instruct-FP8 | ✅ | ❌ | ❌ | text, image | text |
 21 | | Llama-4-Scout-17B-16E-Instruct | ✅ | ❌ | ❌ | text, image | text |
 22 | | MAI-DS-R1 | ✅ | ❌ | ❌ | text | text |
 23 | | Meta-Llama-3-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 24 | | Meta-Llama-3-8B-Instruct | ✅ | ❌ | ❌ | text | text |
 25 | | Meta-Llama-3.1-405B-Instruct | ✅ | ❌ | ❌ | text | text |
 26 | | Meta-Llama-3.1-70B-Instruct | ✅ | ❌ | ❌ | text | text |
 27 | | Meta-Llama-3.1-8B-Instruct | ✅ | ❌ | ❌ | text | text |
 28 | | Ministral-3B | ✅ | ❌ | ✅ | text | text |
 29 | | Mistral-Large-2411 | ✅ | ❌ | ✅ | text | text |
 30 | | Mistral-Nemo | ✅ | ❌ | ✅ | text | text |
 31 | | Mistral-large | ✅ | ❌ | ✅ | text | text |
 32 | | Mistral-large-2407 | ✅ | ❌ | ✅ | text | text |
 33 | | Mistral-small | ✅ | ❌ | ✅ | text | text |
 34 | | Phi-3-medium-128k-instruct | ✅ | ❌ | ❌ | text | text |
 35 | | Phi-3-medium-4k-instruct | ✅ | ❌ | ❌ | text | text |
 36 | | Phi-3-mini-128k-instruct | ✅ | ❌ | ❌ | text | text |
 37 | | Phi-3-mini-4k-instruct | ✅ | ❌ | ❌ | text | text |
 38 | | Phi-3-small-128k-instruct | ✅ | ❌ | ❌ | text | text |
 39 | | Phi-3-small-8k-instruct | ✅ | ❌ | ❌ | text | text |
 40 | | Phi-3.5-MoE-instruct | ✅ | ❌ | ❌ | text | text |
 41 | | Phi-3.5-mini-instruct | ✅ | ❌ | ❌ | text | text |
 42 | | Phi-3.5-vision-instruct | ✅ | ❌ | ❌ | text, image | text |
 43 | | Phi-4 | ✅ | ❌ | ❌ | text | text |
 44 | | Phi-4-mini-instruct | ✅ | ❌ | ❌ | text | text |
 45 | | Phi-4-mini-reasoning | ✅ | ❌ | ❌ | text | text |
 46 | | Phi-4-multimodal-instruct | ✅ | ❌ | ❌ | audio, image, text | text |
 47 | | Phi-4-reasoning | ✅ | ❌ | ❌ | text | text |
 48 | | cohere-command-a | ✅ | ❌ | ✅ | text | text |
 49 | | gpt-4.1 | ✅ | ✅ | ✅ | text, image | text |
 50 | | gpt-4.1-mini | ✅ | ✅ | ✅ | text, image | text |
 51 | | gpt-4.1-nano | ✅ | ✅ | ✅ | text, image | text |
 52 | | gpt-4o | ✅ | ✅ | ✅ | text, image, audio | text |
 53 | | gpt-4o-mini | ✅ | ✅ | ✅ | text, image, audio | text |
 54 | | grok-3 | ✅ | ❌ | ✅ | text | text |
 55 | | grok-3-mini | ✅ | ❌ | ✅ | text | text |
 56 | | jais-30b-chat | ✅ | ❌ | ❌ | text | text |
 57 | | mistral-medium-2505 | ✅ | ❌ | ✅ | text, image | text |
 58 | | mistral-small-2503 | ✅ | ❌ | ✅ | text, image | text |
 59 | | o1 | ❌ | ✅ | ✅ | text, image | text |
 60 | | o1-mini | ❌ | ❌ | ❌ | text | text |
 61 | | o1-preview | ❌ | ❌ | ❌ | text | text |
 62 | | o3 | ✅ | ❌ | ✅ | text, image | text |
 63 | | o3-mini | ❌ | ✅ | ✅ | text | text |
 64 | | o4-mini | ✅ | ❌ | ✅ | text, image | text |
 65 | 
 66 | ### AI21 Jamba 1.5 Large
 67 | 
 68 | Usage: `llm -m github/AI21-Jamba-1.5-Large`
 69 | 
 70 | **Publisher:** AI21 Labs 
 71 | 
 72 | **Description:** A 398B parameters (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation. 
 73 | 
 74 | ### AI21 Jamba 1.5 Mini
 75 | 
 76 | Usage: `llm -m github/AI21-Jamba-1.5-Mini`
 77 | 
 78 | **Publisher:** AI21 Labs 
 79 | 
 80 | **Description:** A 52B parameters (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation. 
 81 | 
 82 | ### Codestral 25.01
 83 | 
 84 | Usage: `llm -m github/Codestral-2501`
 85 | 
 86 | **Publisher:** Mistral AI 
 87 | 
 88 | **Description:** Codestral 25.01 by Mistral AI is designed for code generation, supporting 80+ programming languages, and optimized for tasks like code completion and fill-in-the-middle 
 89 | 
 90 | ### Cohere Command R
 91 | 
 92 | Usage: `llm -m github/Cohere-command-r`
 93 | 
 94 | **Publisher:** Cohere 
 95 | 
 96 | **Description:** Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. 
 97 | 
 98 | ### Cohere Command R 08-2024
 99 | 
100 | Usage: `llm -m github/Cohere-command-r-08-2024`
101 | 
102 | **Publisher:** Cohere 
103 | 
104 | **Description:** Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. 
105 | 
106 | ### Cohere Command R+
107 | 
108 | Usage: `llm -m github/Cohere-command-r-plus`
109 | 
110 | **Publisher:** Cohere 
111 | 
112 | **Description:** Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads. 
113 | 
114 | ### Cohere Command R+ 08-2024
115 | 
116 | Usage: `llm -m github/Cohere-command-r-plus-08-2024`
117 | 
118 | **Publisher:** Cohere 
119 | 
120 | **Description:** Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads. 
121 | 
122 | ### Cohere Embed v3 English
123 | 
124 | Usage: `llm -m github/Cohere-embed-v3-english`
125 | 
126 | **Publisher:** Cohere 
127 | 
128 | **Description:** Cohere Embed English is the market's leading text representation model used for semantic search, retrieval-augmented generation (RAG), classification, and clustering. 
129 | 
130 | ### Cohere Embed v3 Multilingual
131 | 
132 | Usage: `llm -m github/Cohere-embed-v3-multilingual`
133 | 
134 | **Publisher:** Cohere 
135 | 
136 | **Description:** Cohere Embed Multilingual is the market's leading text representation model used for semantic search, retrieval-augmented generation (RAG), classification, and clustering. 
137 | 
138 | ### DeepSeek-R1
139 | 
140 | Usage: `llm -m github/DeepSeek-R1`
141 | 
142 | **Publisher:** DeepSeek 
143 | 
144 | **Description:** DeepSeek-R1 excels at reasoning tasks using a step-by-step training process, such as language, scientific reasoning, and coding tasks. 
145 | 
146 | ### DeepSeek-V3
147 | 
148 | Usage: `llm -m github/DeepSeek-V3`
149 | 
150 | **Publisher:** DeepSeek 
151 | 
152 | **Description:** A strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. 
153 | 
154 | ### DeepSeek-V3-0324
155 | 
156 | Usage: `llm -m github/DeepSeek-V3-0324`
157 | 
158 | **Publisher:** DeepSeek 
159 | 
160 | **Description:** DeepSeek-V3-0324 demonstrates notable improvements over its predecessor, DeepSeek-V3, in several key aspects, including enhanced reasoning, improved function calling, and superior code generation capabilities. 
161 | 
162 | ### Llama-3.2-11B-Vision-Instruct
163 | 
164 | Usage: `llm -m github/Llama-3.2-11B-Vision-Instruct`
165 | 
166 | **Publisher:** Meta 
167 | 
168 | **Description:** Excels in image reasoning capabilities on high-res images for visual understanding apps. 
169 | 
170 | ### Llama-3.2-90B-Vision-Instruct
171 | 
172 | Usage: `llm -m github/Llama-3.2-90B-Vision-Instruct`
173 | 
174 | **Publisher:** Meta 
175 | 
176 | **Description:** Advanced image reasoning capabilities for visual understanding agentic apps. 
177 | 
178 | ### Llama-3.3-70B-Instruct
179 | 
180 | Usage: `llm -m github/Llama-3.3-70B-Instruct`
181 | 
182 | **Publisher:** Meta 
183 | 
184 | **Description:** Llama 3.3 70B Instruct offers enhanced reasoning, math, and instruction following with performance comparable to Llama 3.1 405B. 
185 | 
186 | ### Llama 4 Maverick 17B 128E Instruct FP8
187 | 
188 | Usage: `llm -m github/Llama-4-Maverick-17B-128E-Instruct-FP8`
189 | 
190 | **Publisher:** Meta 
191 | 
192 | **Description:** Llama 4 Maverick 17B 128E Instruct FP8 is great at precise image understanding and creative writing, offering high quality at a lower price compared to Llama 3.3 70B 
193 | 
194 | ### Llama 4 Scout 17B 16E Instruct
195 | 
196 | Usage: `llm -m github/Llama-4-Scout-17B-16E-Instruct`
197 | 
198 | **Publisher:** Meta 
199 | 
200 | **Description:** Llama 4 Scout 17B 16E Instruct is great at multi-document summarization, parsing extensive user activity for personalized tasks, and reasoning over vast codebases. 
201 | 
202 | ### MAI-DS-R1
203 | 
204 | Usage: `llm -m github/MAI-DS-R1`
205 | 
206 | **Publisher:** Microsoft 
207 | 
208 | **Description:** MAI-DS-R1 is a DeepSeek-R1 reasoning model that has been post-trained by the Microsoft AI team to fill in information gaps in the previous version of the model and improve its harm protections while maintaining R1 reasoning capabilities. 
209 | 
210 | ### Meta-Llama-3-70B-Instruct
211 | 
212 | Usage: `llm -m github/Meta-Llama-3-70B-Instruct`
213 | 
214 | **Publisher:** Meta 
215 | 
216 | **Description:** A powerful 70-billion parameter model excelling in reasoning, coding, and broad language applications. 
217 | 
218 | ### Meta-Llama-3-8B-Instruct
219 | 
220 | Usage: `llm -m github/Meta-Llama-3-8B-Instruct`
221 | 
222 | **Publisher:** Meta 
223 | 
224 | **Description:** A versatile 8-billion parameter model optimized for dialogue and text generation tasks. 
225 | 
226 | ### Meta-Llama-3.1-405B-Instruct
227 | 
228 | Usage: `llm -m github/Meta-Llama-3.1-405B-Instruct`
229 | 
230 | **Publisher:** Meta 
231 | 
232 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
233 | 
234 | ### Meta-Llama-3.1-70B-Instruct
235 | 
236 | Usage: `llm -m github/Meta-Llama-3.1-70B-Instruct`
237 | 
238 | **Publisher:** Meta 
239 | 
240 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
241 | 
242 | ### Meta-Llama-3.1-8B-Instruct
243 | 
244 | Usage: `llm -m github/Meta-Llama-3.1-8B-Instruct`
245 | 
246 | **Publisher:** Meta 
247 | 
248 | **Description:** The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks. 
249 | 
250 | ### Ministral 3B
251 | 
252 | Usage: `llm -m github/Ministral-3B`
253 | 
254 | **Publisher:** Mistral AI 
255 | 
256 | **Description:** Ministral 3B is a state-of-the-art Small Language Model (SLM) optimized for edge computing and on-device applications. As it is designed for low-latency and compute-efficient inference, it it also the perfect model for standard GenAI applications that have 
257 | 
258 | ### Mistral Large 24.11
259 | 
260 | Usage: `llm -m github/Mistral-Large-2411`
261 | 
262 | **Publisher:** Mistral AI 
263 | 
264 | **Description:** Mistral Large 24.11 offers enhanced system prompts, advanced reasoning and function calling capabilities. 
265 | 
266 | ### Mistral Nemo
267 | 
268 | Usage: `llm -m github/Mistral-Nemo`
269 | 
270 | **Publisher:** Mistral AI 
271 | 
272 | **Description:** Mistral Nemo is a cutting-edge Language Model (LLM) boasting state-of-the-art reasoning, world knowledge, and coding capabilities within its size category. 
273 | 
274 | ### Mistral Large
275 | 
276 | Usage: `llm -m github/Mistral-large`
277 | 
278 | **Publisher:** Mistral AI 
279 | 
280 | **Description:** Mistral's flagship model that's ideal for complex tasks that require large reasoning capabilities or are highly specialized (Synthetic Text Generation, Code Generation, RAG, or Agents). 
281 | 
282 | ### Mistral Large (2407)
283 | 
284 | Usage: `llm -m github/Mistral-large-2407`
285 | 
286 | **Publisher:** Mistral AI 
287 | 
288 | **Description:** Mistral Large (2407) is an advanced Large Language Model (LLM) with state-of-the-art reasoning, knowledge and coding capabilities. 
289 | 
290 | ### Mistral Small
291 | 
292 | Usage: `llm -m github/Mistral-small`
293 | 
294 | **Publisher:** Mistral AI 
295 | 
296 | **Description:** Mistral Small can be used on any language-based task that requires high efficiency and low latency. 
297 | 
298 | ### Phi-3-medium instruct (128k)
299 | 
300 | Usage: `llm -m github/Phi-3-medium-128k-instruct`
301 | 
302 | **Publisher:** Microsoft 
303 | 
304 | **Description:** Same Phi-3-medium model, but with a larger context size for RAG or few shot prompting. 
305 | 
306 | ### Phi-3-medium instruct (4k)
307 | 
308 | Usage: `llm -m github/Phi-3-medium-4k-instruct`
309 | 
310 | **Publisher:** Microsoft 
311 | 
312 | **Description:** A 14B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data. 
313 | 
314 | ### Phi-3-mini instruct (128k)
315 | 
316 | Usage: `llm -m github/Phi-3-mini-128k-instruct`
317 | 
318 | **Publisher:** Microsoft 
319 | 
320 | **Description:** Same Phi-3-mini model, but with a larger context size for RAG or few shot prompting. 
321 | 
322 | ### Phi-3-mini instruct (4k)
323 | 
324 | Usage: `llm -m github/Phi-3-mini-4k-instruct`
325 | 
326 | **Publisher:** Microsoft 
327 | 
328 | **Description:** Tiniest member of the Phi-3 family. Optimized for both quality and low latency. 
329 | 
330 | ### Phi-3-small instruct (128k)
331 | 
332 | Usage: `llm -m github/Phi-3-small-128k-instruct`
333 | 
334 | **Publisher:** Microsoft 
335 | 
336 | **Description:** Same Phi-3-small model, but with a larger context size for RAG or few shot prompting. 
337 | 
338 | ### Phi-3-small instruct (8k)
339 | 
340 | Usage: `llm -m github/Phi-3-small-8k-instruct`
341 | 
342 | **Publisher:** Microsoft 
343 | 
344 | **Description:** A 7B parameters model, proves better quality than Phi-3-mini, with a focus on high-quality, reasoning-dense data. 
345 | 
346 | ### Phi-3.5-MoE instruct (128k)
347 | 
348 | Usage: `llm -m github/Phi-3.5-MoE-instruct`
349 | 
350 | **Publisher:** Microsoft 
351 | 
352 | **Description:** A new mixture of experts model 
353 | 
354 | ### Phi-3.5-mini instruct (128k)
355 | 
356 | Usage: `llm -m github/Phi-3.5-mini-instruct`
357 | 
358 | **Publisher:** Microsoft 
359 | 
360 | **Description:** Refresh of Phi-3-mini model. 
361 | 
362 | ### Phi-3.5-vision instruct (128k)
363 | 
364 | Usage: `llm -m github/Phi-3.5-vision-instruct`
365 | 
366 | **Publisher:** Microsoft 
367 | 
368 | **Description:** Refresh of Phi-3-vision model. 
369 | 
370 | ### Phi-4
371 | 
372 | Usage: `llm -m github/Phi-4`
373 | 
374 | **Publisher:** Microsoft 
375 | 
376 | **Description:** Phi-4 14B, a highly capable model for low latency scenarios. 
377 | 
378 | ### Phi-4-mini-instruct
379 | 
380 | Usage: `llm -m github/Phi-4-mini-instruct`
381 | 
382 | **Publisher:** Microsoft 
383 | 
384 | **Description:** 3.8B parameters Small Language Model outperforming larger models in reasoning, math, coding, and function-calling 
385 | 
386 | ### Phi-4-mini-reasoning
387 | 
388 | Usage: `llm -m github/Phi-4-mini-reasoning`
389 | 
390 | **Publisher:** Microsoft 
391 | 
392 | **Description:** Lightweight math reasoning model optimized for multi-step problem solving 
393 | 
394 | ### Phi-4-multimodal-instruct
395 | 
396 | Usage: `llm -m github/Phi-4-multimodal-instruct`
397 | 
398 | **Publisher:** Microsoft 
399 | 
400 | **Description:** First small multimodal model to have 3 modality inputs (text, audio, image), excelling in quality and efficiency 
401 | 
402 | ### Phi-4-Reasoning
403 | 
404 | Usage: `llm -m github/Phi-4-reasoning`
405 | 
406 | **Publisher:** Microsoft 
407 | 
408 | **Description:** State-of-the-art open-weight reasoning model. 
409 | 
410 | ### Cohere Command A
411 | 
412 | Usage: `llm -m github/cohere-command-a`
413 | 
414 | **Publisher:** Cohere 
415 | 
416 | **Description:** Command A is a highly efficient generative model that excels at agentic and multilingual use cases. 
417 | 
418 | ### Cohere Embed 4
419 | 
420 | Usage: `llm -m github/embed-v-4-0`
421 | 
422 | **Publisher:** Cohere 
423 | 
424 | **Description:** Embed 4 transforms texts and images into numerical vectors 
425 | 
426 | ### OpenAI GPT-4.1
427 | 
428 | Usage: `llm -m github/gpt-4.1`
429 | 
430 | **Publisher:** OpenAI 
431 | 
432 | **Description:** gpt-4.1 outperforms gpt-4o across the board, with major gains in coding, instruction following, and long-context understanding 
433 | 
434 | ### OpenAI GPT-4.1-mini
435 | 
436 | Usage: `llm -m github/gpt-4.1-mini`
437 | 
438 | **Publisher:** OpenAI 
439 | 
440 | **Description:** gpt-4.1-mini outperform gpt-4o-mini across the board, with major gains in coding, instruction following, and long-context handling 
441 | 
442 | ### OpenAI GPT-4.1-nano
443 | 
444 | Usage: `llm -m github/gpt-4.1-nano`
445 | 
446 | **Publisher:** OpenAI 
447 | 
448 | **Description:** gpt-4.1-nano provides gains in coding, instruction following, and long-context handling along with lower latency and cost 
449 | 
450 | ### OpenAI GPT-4o
451 | 
452 | Usage: `llm -m github/gpt-4o`
453 | 
454 | **Publisher:** OpenAI 
455 | 
456 | **Description:** OpenAI's most advanced multimodal model in the gpt-4o family. Can handle both text and image inputs. 
457 | 
458 | ### OpenAI GPT-4o mini
459 | 
460 | Usage: `llm -m github/gpt-4o-mini`
461 | 
462 | **Publisher:** OpenAI 
463 | 
464 | **Description:** An affordable, efficient AI solution for diverse text and image tasks. 
465 | 
466 | ### Grok 3
467 | 
468 | Usage: `llm -m github/grok-3`
469 | 
470 | **Publisher:** xAI 
471 | 
472 | **Description:** Grok 3 is xAI's debut model, pretrained by Colossus at supermassive scale to excel in specialized domains like finance, healthcare, and the law. 
473 | 
474 | ### Grok 3 Mini
475 | 
476 | Usage: `llm -m github/grok-3-mini`
477 | 
478 | **Publisher:** xAI 
479 | 
480 | **Description:** Grok 3 Mini is a lightweight model that thinks before responding. Trained on mathematic and scientific problems, it is great for logic-based tasks. 
481 | 
482 | ### JAIS 30b Chat
483 | 
484 | Usage: `llm -m github/jais-30b-chat`
485 | 
486 | **Publisher:** Core42 
487 | 
488 | **Description:** JAIS 30b Chat is an auto-regressive bilingual LLM for Arabic & English with state-of-the-art capabilities in Arabic. 
489 | 
490 | ### Mistral Medium 3 (25.05)
491 | 
492 | Usage: `llm -m github/mistral-medium-2505`
493 | 
494 | **Publisher:** Mistral AI 
495 | 
496 | **Description:** Mistral Medium 3 is an advanced Large Language Model (LLM) with state-of-the-art reasoning, knowledge, coding and vision capabilities. 
497 | 
498 | ### Mistral Small 3.1
499 | 
500 | Usage: `llm -m github/mistral-small-2503`
501 | 
502 | **Publisher:** Mistral AI 
503 | 
504 | **Description:** Enhanced Mistral Small 3 with multimodal capabilities and a 128k context length. 
505 | 
506 | ### OpenAI o1
507 | 
508 | Usage: `llm -m github/o1`
509 | 
510 | **Publisher:** OpenAI 
511 | 
512 | **Description:** Focused on advanced reasoning and solving complex problems, including math and science tasks. Ideal for applications that require deep contextual understanding and agentic workflows. 
513 | 
514 | ### OpenAI o1-mini
515 | 
516 | Usage: `llm -m github/o1-mini`
517 | 
518 | **Publisher:** OpenAI 
519 | 
520 | **Description:** Smaller, faster, and 80% cheaper than o1-preview, performs well at code generation and small context operations. 
521 | 
522 | ### OpenAI o1-preview
523 | 
524 | Usage: `llm -m github/o1-preview`
525 | 
526 | **Publisher:** OpenAI 
527 | 
528 | **Description:** Focused on advanced reasoning and solving complex problems, including math and science tasks. Ideal for applications that require deep contextual understanding and agentic workflows. 
529 | 
530 | ### OpenAI o3
531 | 
532 | Usage: `llm -m github/o3`
533 | 
534 | **Publisher:** OpenAI 
535 | 
536 | **Description:** o3 includes significant improvements on quality and safety while supporting the existing features of o1 and delivering comparable or better performance. 
537 | 
538 | ### OpenAI o3-mini
539 | 
540 | Usage: `llm -m github/o3-mini`
541 | 
542 | **Publisher:** OpenAI 
543 | 
544 | **Description:** o3-mini includes the o1 features with significant cost-efficiencies for scenarios requiring high performance. 
545 | 
546 | ### OpenAI o4-mini
547 | 
548 | Usage: `llm -m github/o4-mini`
549 | 
550 | **Publisher:** OpenAI 
551 | 
552 | **Description:** o4-mini includes significant improvements on quality and safety while supporting the existing features of o3-mini and delivering comparable or better performance. 
553 | 
554 | ### OpenAI Text Embedding 3 (large)
555 | 
556 | Usage: `llm -m github/text-embedding-3-large`
557 | 
558 | **Publisher:** OpenAI 
559 | 
560 | **Description:** Text-embedding-3 series models are the latest and most capable embedding model from OpenAI. 
561 | 
562 | ### OpenAI Text Embedding 3 (small)
563 | 
564 | Usage: `llm -m github/text-embedding-3-small`
565 | 
566 | **Publisher:** OpenAI 
567 | 
568 | **Description:** Text-embedding-3 series models are the latest and most capable embedding model from OpenAI. 
569 | 
570 | 


--------------------------------------------------------------------------------
/tools/parse_models_json.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A script to parse the models.json from the github API until there is a live API to call.
  3 | """
  4 | 
  5 | import json
  6 | from pprint import pprint
  7 | 
  8 | chat_models = []
  9 | embedding_models = []
 10 | 
 11 | 
 12 | def supports_streaming(name):
 13 |     if name in ["o1", "o1-mini", "o1-preview", "o3-mini"]:
 14 |         return False
 15 |     return True
 16 | 
 17 | 
 18 | def supports_schemas(name):
 19 |     if name in [
 20 |         "gpt-4o",
 21 |         "gpt-4o-mini",
 22 |         "gpt-4.1",
 23 |         "gpt-4.1-mini",
 24 |         "gpt-4.1-nano",
 25 |         "o1",
 26 |         "o3-mini",
 27 |     ]:
 28 |         return True
 29 |     return False
 30 | 
 31 | 
 32 | def requires_usage_stream_option(name):
 33 |     return name in [
 34 |         "gpt-4o",
 35 |         "gpt-4o-mini",
 36 |         "gpt-4.1",
 37 |         "gpt-4.1-mini",
 38 |         "gpt-4.1-nano",
 39 |         "o3",
 40 |         "o4-mini",
 41 |     ]
 42 | 
 43 | 
 44 | def supports_tools(name):
 45 |     # Note: this list does not line up with the official docs at
 46 |     # https://learn.microsoft.com/en-us/azure/machine-learning/concept-models-featured?view=azureml-api-2
 47 |     # But in practice these are the models that work.
 48 |     tool_supporting_models = [
 49 |         "o3",
 50 |         "o3-mini",
 51 |         "o4-mini",
 52 |         "o1",
 53 |         "gpt-4o",
 54 |         "gpt-4o-mini",
 55 |         "gpt-4.1",
 56 |         "gpt-4.1-mini",
 57 |         "gpt-4.1-nano",
 58 |         "grok-3",
 59 |         "grok-3-mini",
 60 |         "cohere-command-a",
 61 |         "Cohere-command-r-plus-08-2024",
 62 |         "Cohere-command-r-08-2024",
 63 |         "Cohere-command-r-plus",
 64 |         "Cohere-command-r",
 65 |         "Codestral-2501",
 66 |         "Ministral-3B",
 67 |         "Mistral-Nemo",
 68 |         "Mistral-Large-2411",
 69 |         "Mistral-large-2407",
 70 |         "Mistral-large",
 71 |         "mistral-medium-2505",
 72 |         "mistral-small-2503",
 73 |         "Mistral-small",
 74 |     ]
 75 |     return name in tool_supporting_models
 76 | 
 77 | 
 78 | with open("models.json", "r", encoding="utf-8") as f:
 79 |     models = json.load(f)
 80 |     for model in models:
 81 |         if "chat-completion" in model["inferenceTasks"]:
 82 |             chat_models.append(
 83 |                 (
 84 |                     model["name"],
 85 |                     supports_streaming(model["name"]),
 86 |                     supports_schemas(model["name"]),
 87 |                     requires_usage_stream_option(model["name"]),
 88 |                     supports_tools(model["name"]),
 89 |                     model["modelLimits"]["supportedInputModalities"],
 90 |                     model["modelLimits"]["supportedOutputModalities"],
 91 |                 )
 92 |             )
 93 |         elif "embeddings" in model["inferenceTasks"]:
 94 |             embedding_models.append(model["name"])
 95 |         else:
 96 |             print("Not sure what to do with this model: ", model["name"])
 97 | 
 98 | print("Chat models:")
 99 | # sort by name
100 | chat_models = sorted(chat_models, key=lambda x: x[0])
101 | pprint(chat_models, indent=4, width=999)
102 | print("Embedding models:")
103 | # sort by name
104 | embedding_models = sorted(embedding_models)
105 | pprint(embedding_models, indent=4)
106 | 
107 | # Make a Markdown series for the models
108 | 
109 | with open("models.fragment.md", "w", encoding="utf-8") as f:
110 |     f.write("## Supported Models\n\n")
111 | 
112 |     # Add chat models table
113 |     f.write("### Chat Models\n\n")
114 |     f.write("| Model Name | Streaming | Schemas | Tools | Input Modalities | Output Modalities |\n")
115 |     f.write("|------------|-----------|---------|-------|------------------|-------------------|\n")
116 | 
117 |     for (
118 |         model_name,
119 |         streaming,
120 |         schemas,
121 |         usage_stream,
122 |         tools,
123 |         input_modalities,
124 |         output_modalities,
125 |     ) in chat_models:
126 |         streaming_str = "✅" if streaming else "❌"
127 |         schemas_str = "✅" if schemas else "❌"
128 |         tools_str = "✅" if tools else "❌"
129 |         input_str = ", ".join(input_modalities) if input_modalities else "text"
130 |         output_str = ", ".join(output_modalities) if output_modalities else "text"
131 | 
132 |         f.write(
133 |             f"| {model_name} | {streaming_str} | {schemas_str} |"
134 |             f" {tools_str} | {input_str} | {output_str} |\n"
135 |         )
136 | 
137 |     f.write("\n")
138 | 
139 |     for model in models:
140 |         f.write(f"### {model['displayName']}\n\n")
141 |         f.write(f"Usage: `llm -m github/{model['name']}`\n\n")
142 |         f.write(f"**Publisher:** {model['publisher']} \n\n")
143 |         f.write(f"**Description:** {model['summary'].replace('\n## ', '\n#### ')} \n\n")
144 | 


--------------------------------------------------------------------------------