├── lm_proxy
    ├── __init__.py
    ├── __main__.py
    ├── config_loaders
    │   ├── toml.py
    │   ├── json.py
    │   ├── __init__.py
    │   ├── python.py
    │   └── yaml.py
    ├── api_key_check
    │   ├── __init__.py
    │   ├── in_config.py
    │   ├── allow_all.py
    │   └── with_request.py
    ├── models_endpoint.py
    ├── app.py
    ├── base_types.py
    ├── loggers.py
    ├── utils.py
    ├── bootstrap.py
    ├── config.py
    └── core.py
├── tests
    ├── configs
    │   ├── __init__.py
    │   ├── no_api_key_check.yml
    │   ├── test_config.toml
    │   ├── test_config.yml
    │   ├── test_config.json
    │   ├── config_fn.py
    │   └── test_disabled.py
    ├── __init__.py
    ├── test_resolve_connection_and_model.py
    ├── test_api_key_check_allow_all.py
    ├── test_config_loaders.py
    ├── test_api_key_check_errors.py
    ├── conftest.py
    ├── test_models_endpoint.py
    ├── test_loggers.py
    ├── test_integration.py
    └── test_utils.py
├── .flake8
├── .gitignore
├── .env.template
├── .github
    └── workflows
    │   ├── code-style.yml
    │   ├── gito-code-review.yml
    │   ├── tests.yml
    │   └── gito-react-to-comments.yml
├── Makefile
├── coverage.svg
├── examples
    ├── vertex-ai.toml
    └── load_balancer_config.py
├── LICENSE
├── multi-build.py
├── config.toml
├── pyproject.toml
└── README.md


/lm_proxy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/configs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # This file enables pytest to discover tests in this directory
2 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | exclude =
4 |     .git,
5 |     __pycache__,
6 |     .venv,
7 |     .aico/*
8 | 


--------------------------------------------------------------------------------
/lm_proxy/__main__.py:
--------------------------------------------------------------------------------
1 | """Provides the CLI entry point when the package is executed as a Python module."""
2 | from .app import cli_app
3 | 
4 | 
5 | if __name__ == "__main__":
6 |     cli_app()
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | .idea/
 3 | venv
 4 | .env*
 5 | !.env.template
 6 | .pytest_cache
 7 | storage
 8 | dist
 9 | docs
10 | .aico/*
11 | !.aico/project.json
12 | .coverage
13 | coverage.xml
14 | 


--------------------------------------------------------------------------------
/tests/configs/no_api_key_check.yml:
--------------------------------------------------------------------------------
1 | host: "127.0.0.1"
2 | port: 8787
3 | api_key_check: "lm_proxy.api_key_check.AllowAll"
4 | connections:
5 |   test: "tests.conftest.llm_ok_connection"
6 | routing:
7 |   "*": "test"
8 | 


--------------------------------------------------------------------------------
/lm_proxy/config_loaders/toml.py:
--------------------------------------------------------------------------------
1 | """TOML configuration loader."""
2 | import tomllib
3 | 
4 | 
5 | def load_toml_config(config_path: str) -> dict:
6 |     """Loads configuration from a TOML file."""
7 |     with open(config_path, "rb") as f:
8 |         return tomllib.load(f)
9 | 


--------------------------------------------------------------------------------
/lm_proxy/config_loaders/json.py:
--------------------------------------------------------------------------------
1 | """JSON configuration loader."""
2 | import json
3 | 
4 | 
5 | def load_json_config(config_path: str) -> dict:
6 |     """Loads configuration from a JSON file."""
7 |     with open(config_path, "r", encoding="utf-8") as f:
8 |         return json.load(f)
9 | 


--------------------------------------------------------------------------------
/lm_proxy/api_key_check/__init__.py:
--------------------------------------------------------------------------------
1 | """Collection of built-in API-key checkers for usage in the configuration."""
2 | from .in_config import check_api_key_in_config
3 | from .with_request import CheckAPIKeyWithRequest
4 | from .allow_all import AllowAll
5 | 
6 | __all__ = ["check_api_key_in_config", "CheckAPIKeyWithRequest", "AllowAll"]
7 | 


--------------------------------------------------------------------------------
/lm_proxy/config_loaders/__init__.py:
--------------------------------------------------------------------------------
 1 | """Built-in configuration loaders for different file formats."""
 2 | from .python import load_python_config
 3 | from .toml import load_toml_config
 4 | from .yaml import load_yaml_config
 5 | from .json import load_json_config
 6 | 
 7 | __all__ = [
 8 |     "load_python_config",
 9 |     "load_toml_config",
10 |     "load_yaml_config",
11 |     "load_json_config",
12 | ]
13 | 


--------------------------------------------------------------------------------
/.env.template:
--------------------------------------------------------------------------------
 1 | # Can be referenced in the lm-proxy configuration files following way:
 2 | # api_key  = "env:<VAR_NAME>"
 3 | OPENAI_API_KEY=sk-u........
 4 | GOOGLE_API_KEY=AI........
 5 | ANTHROPIC_API_KEY=sk-ant-api03--vE........
 6 | 
 7 | # "1", "TRUE", "YES", "ON", "ENABLED", "Y", "+" are true, case-insensitive.
 8 | # See https://github.com/Nayjest/ai-microcore/blob/v4.4.3/microcore/configuration.py#L36
 9 | LM_PROXY_DEBUG=no
10 | 


--------------------------------------------------------------------------------
/.github/workflows/code-style.yml:
--------------------------------------------------------------------------------
 1 | name: Code Style
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   cs:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v3
10 |       - uses: actions/setup-python@v3
11 |         with:
12 |           python-version: 3.12
13 |       - name: Install dependencies
14 |         run: pip install --upgrade pip flake8 pylint
15 |       - name: Run flake8
16 |         run: flake8 .
17 | 


--------------------------------------------------------------------------------
/lm_proxy/config_loaders/python.py:
--------------------------------------------------------------------------------
 1 | """Loader for Python configuration files."""
 2 | import importlib.util
 3 | from ..config import Config
 4 | 
 5 | 
 6 | def load_python_config(config_path: str) -> Config:
 7 |     """Load configuration from a Python file."""
 8 |     spec = importlib.util.spec_from_file_location("config_module", config_path)
 9 |     config_module = importlib.util.module_from_spec(spec)
10 |     spec.loader.exec_module(config_module)
11 |     return config_module.config
12 | 


--------------------------------------------------------------------------------
/tests/configs/test_config.toml:
--------------------------------------------------------------------------------
 1 | host = "127.0.0.1"
 2 | port = 8787
 3 | [connections]
 4 | 
 5 | [connections.test_openai]
 6 | api_type = "open_ai"
 7 | api_base = "https://api.openai.com/v1/"
 8 | api_key = "env:OPENAI_API_KEY"
 9 | 
10 | [connections.test_google]
11 | api_type = "google_ai_studio"
12 | api_key = "env:GOOGLE_API_KEY"
13 | 
14 | [connections.test_anthropic]
15 | api_type = "anthropic"
16 | api_key = "env:ANTHROPIC_API_KEY"
17 | [routing]
18 | "gpt*" = "test_openai.*"
19 | "claude*" = "test_anthropic.*"
20 | "gemini*" = "test_google.*"
21 | "*" = "test_openai.gpt-5"
22 | [groups.default]
23 | api_keys = []


--------------------------------------------------------------------------------
/tests/configs/test_config.yml:
--------------------------------------------------------------------------------
 1 | host: "127.0.0.1"
 2 | port: 8787
 3 | 
 4 | connections:
 5 |   test_openai:
 6 |     api_type: "open_ai"
 7 |     api_base: "https://api.openai.com/v1/"
 8 |     api_key: "env:OPENAI_API_KEY"
 9 |   test_google:
10 |     api_type: "google_ai_studio"
11 |     api_key: "env:GOOGLE_API_KEY"
12 |   test_anthropic:
13 |     api_type: "anthropic"
14 |     api_key: "env:ANTHROPIC_API_KEY"
15 | 
16 | routing:
17 |   "gpt*": "test_openai.*"
18 |   "claude*": "test_anthropic.*"
19 |   "gemini*": "test_google.*"
20 |   "*": "test_openai.gpt-5"
21 | 
22 | groups:
23 |   default:
24 |     api_keys: []
25 | 


--------------------------------------------------------------------------------
/lm_proxy/config_loaders/yaml.py:
--------------------------------------------------------------------------------
 1 | """YAML configuration loader."""
 2 | 
 3 | 
 4 | def load_yaml_config(config_path: str) -> dict:
 5 |     """Loads a YAML configuration file and returns its contents as a dictionary."""
 6 |     try:
 7 |         import yaml  # pylint: disable=import-outside-toplevel
 8 |     except ImportError as e:
 9 |         raise ImportError(
10 |             "Missing optional dependency 'PyYAML'. "
11 |             "For using YAML configuration files with LM-Proxy, "
12 |             "please install it with the following command: 'pip install pyyaml'."
13 |         ) from e
14 | 
15 |     with open(config_path, "r", encoding="utf-8") as f:
16 |         return yaml.safe_load(f)
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | run:
 2 | 	fastapi run lm_proxy
 3 | 	# alternative: python -m lm_proxy
 4 | start: run
 5 | cs:
 6 | 	flake8 .
 7 | black:
 8 | 	black .
 9 | 
10 | 
11 | install:
12 | 	pip install -e .
13 | 
14 | pkg:
15 | 	python multi-build.py
16 | build: pkg
17 | 
18 | clear-dist:
19 | 	python -c "import shutil, os; shutil.rmtree('dist', ignore_errors=True); os.makedirs('dist', exist_ok=True)"
20 | clr-dist: clear-dist
21 | 
22 | publish:
23 | 	python -c "import os;t=os.getenv('PYPI_TOKEN');__import__('subprocess').run(f'python -m twine upload dist/* -u __token__ -p {t}',shell=True)"
24 | 
25 | upload: publish
26 | test:
27 | 	pytest --log-cli-level=INFO
28 | tests: test
29 | integration-test:
30 | 	pytest tests/test_integration.py -v


--------------------------------------------------------------------------------
/tests/configs/test_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "host": "127.0.0.1",
 3 |   "port": 8787,
 4 |   "connections": {
 5 |     "test_openai": {
 6 |       "api_type": "open_ai",
 7 |       "api_base": "https://api.openai.com/v1/",
 8 |       "api_key": "env:OPENAI_API_KEY"
 9 |     },
10 |     "test_google": {
11 |       "api_type": "google_ai_studio",
12 |       "api_key": "env:GOOGLE_API_KEY"
13 |     },
14 |     "test_anthropic": {
15 |       "api_type": "anthropic",
16 |       "api_key": "env:ANTHROPIC_API_KEY"
17 |     }
18 |   },
19 |   "routing": {
20 |     "gpt*": "test_openai.*",
21 |     "claude*": "test_anthropic.*",
22 |     "gemini*": "test_google.*",
23 |     "*": "test_openai.gpt-5"
24 |   },
25 |   "groups": {
26 |     "default": {
27 |       "api_keys": []
28 |     }
29 |   }
30 | }


--------------------------------------------------------------------------------
/tests/configs/config_fn.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | import microcore as mc
 4 | 
 5 | root = Path(__file__).resolve().parents[3]
 6 | sys.path.append(str(root))
 7 | 
 8 | from lm_proxy.config import Config, Group  # noqa
 9 | 
10 | 
11 | def custom_api_key_check(api_key: str) -> str | None:
12 |     return "default" if api_key == "py-test" else None
13 | 
14 | 
15 | mc.configure(
16 |     DOT_ENV_FILE=".env",
17 |     EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE,
18 | )
19 | 
20 | config = Config(
21 |     port=8123,
22 |     host="127.0.0.1",
23 |     api_key_check=custom_api_key_check,
24 |     connections={"py_oai": mc.env().llm_async_function},
25 |     routing={"*": "py_oai.gpt-3.5-turbo", "my-gpt": "py_oai.gpt-3.5-turbo"},
26 |     groups={"default": Group(connections="*")},
27 | )
28 | 


--------------------------------------------------------------------------------
/tests/test_resolve_connection_and_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from lm_proxy.config import Config
 3 | from lm_proxy.core import resolve_connection_and_model
 4 | 
 5 | 
 6 | async def test_resolve_connection_and_model():
 7 |     c = Config(connections={"a": {}, "b": {}, "c": {}})
 8 |     with pytest.raises(ValueError, match="matched"):
 9 |         resolve_connection_and_model(c, "model")
10 |     c.routing = {
11 |         "client-model": "a.provider-model",
12 |     }
13 |     assert resolve_connection_and_model(c, "client-model") == ("a", "provider-model")
14 | 
15 |     c.routing["gpt*"] = "c.model"
16 |     assert resolve_connection_and_model(c, "gpt-8") == ("c", "model")
17 | 
18 |     c.routing["*"] = "b.*"
19 |     assert resolve_connection_and_model(c, "client-model2") == ("b", "client-model2")
20 | 


--------------------------------------------------------------------------------
/tests/test_api_key_check_allow_all.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from starlette.requests import Request
 4 | 
 5 | from lm_proxy.bootstrap import bootstrap
 6 | from lm_proxy.core import check
 7 | from lm_proxy.api_key_check import AllowAll
 8 | 
 9 | 
10 | async def test_allow_all():
11 |     root = Path(__file__).resolve().parent
12 |     bootstrap(root / "configs" / "no_api_key_check.yml")
13 |     assert await check(Request(scope={
14 |         "type": "http",
15 |         "headers": [],
16 |     })) == ("default", "", {"api_key": ""})
17 | 
18 |     # Test with key
19 |     assert await check(Request(scope={
20 |         "type": "http",
21 |         "headers": [(b"authorization", b"Bearer 11")],
22 |     })) == ("default", "11", {"api_key": "11"})
23 | 
24 |     assert AllowAll()("") == ("default", {"api_key": ""})
25 |     assert AllowAll(capture_api_key=False)("") == ("default", {})
26 | 


--------------------------------------------------------------------------------
/tests/test_config_loaders.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | import dotenv
 5 | import pytest
 6 | 
 7 | from lm_proxy.config import Config
 8 | 
 9 | 
10 | def test_config_loaders():
11 |     root = Path(__file__).resolve().parent
12 |     dotenv.load_dotenv(root.parent / ".env.template", override=True)
13 |     oai_key = os.getenv("OPENAI_API_KEY")
14 |     toml = Config.load(root / "configs" / "test_config.toml")
15 |     json = Config.load(root / "configs" / "test_config.json")
16 |     yaml = Config.load(root / "configs" / "test_config.yml")
17 | 
18 |     assert json.model_dump() == yaml.model_dump() == toml.model_dump()
19 |     assert json.connections["test_openai"]["api_key"] == oai_key
20 | 
21 |     py = Config.load(root / "configs" / "config_fn.py")
22 |     assert isinstance(py, Config)
23 | 
24 |     # Expect an error for unsupported format
25 |     with pytest.raises(ValueError):
26 |         Config.load(root / "configs" / "test_config.xyz")
27 | 


--------------------------------------------------------------------------------
/tests/test_api_key_check_errors.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from starlette.requests import Request
 4 | from fastapi import HTTPException
 5 | 
 6 | from lm_proxy.bootstrap import bootstrap
 7 | from lm_proxy.config import Config
 8 | from lm_proxy.core import check
 9 | 
10 | 
11 | async def test_disabled():
12 |     bootstrap(Config(enabled=False, connections={}))
13 |     with pytest.raises(HTTPException, match="disabled"):
14 |         await check(Request(scope={
15 |             "type": "http",
16 |             "headers": [],
17 |         }))
18 | 
19 | 
20 | async def test_403():
21 |     bootstrap(Config(connections={}))
22 |     with pytest.raises(HTTPException) as excinfo:
23 |         await check(Request(scope={
24 |             "type": "http",
25 |             "headers": [
26 |                 (b"authorization", b"Bearer mykey"),
27 |             ],
28 |         }))
29 |     assert excinfo.value.status_code == 403
30 |     assert "Incorrect API key" in str(excinfo.value)
31 | 


--------------------------------------------------------------------------------
/lm_proxy/api_key_check/in_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | API Key check implementation that validates against configured groups.
 3 | 
 4 | Checks if a provided API key exists within any of the defined groups.
 5 | For using this function,
 6 | set "api_key_check" configuration value to "lm_proxy.api_key_check.check_api_key_in_config".
 7 | """
 8 | from typing import Optional
 9 | from ..bootstrap import env
10 | 
11 | 
12 | def check_api_key_in_config(api_key: Optional[str]) -> Optional[str]:
13 |     """
14 |     Validates a Client API key against configured groups and returns the matching group name.
15 | 
16 |     Args:
17 |         api_key (Optional[str]): The Virtual / Client API key to validate.
18 |     Returns:
19 |         Optional[str]: The group name if the API key is valid and found in a group,
20 |         None otherwise.
21 |     """
22 |     for group_name, group in env.config.groups.items():
23 |         if api_key in group.api_keys:
24 |             return group_name
25 |     return None
26 | 


--------------------------------------------------------------------------------
/coverage.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" width="99" height="20">
 3 |     <linearGradient id="b" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 5 |         <stop offset="1" stop-opacity=".1"/>
 6 |     </linearGradient>
 7 |     <mask id="a">
 8 |         <rect width="99" height="20" rx="3" fill="#fff"/>
 9 |     </mask>
10 |     <g mask="url(#a)">
11 |         <path fill="#555" d="M0 0h63v20H0z"/>
12 |         <path fill="#dfb317" d="M63 0h36v20H63z"/>
13 |         <path fill="url(#b)" d="M0 0h99v20H0z"/>
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
16 |         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
17 |         <text x="31.5" y="14">coverage</text>
18 |         <text x="80" y="15" fill="#010101" fill-opacity=".3">71%</text>
19 |         <text x="80" y="14">71%</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/examples/vertex-ai.toml:
--------------------------------------------------------------------------------
 1 | # Pre-requisites: Python v3.11 / v3.12 / v3.13
 2 | # Steps:
 3 | # 1. Install LM-Proxy:
 4 | #    > pip install lm-proxy
 5 | # 2. Install Vertex AI API: 
 6 | #    > pip install vertexai
 7 | # 3. Install Google Cloud SDK: https://cloud.google.com/sdk/docs/install
 8 | # 4. Authenticate with Google Cloud:
 9 | #    > gcloud auth application-default login
10 | # 5. Set your Google Cloud project:
11 | #    > gcloud config set project <PROJECT_ID>
12 | # 6. Save this config as `vertex-ai.toml`
13 | # 7. Fill in connections.vertex_ai.google_vertex_project_id and Virtual API Keys for usage in LLM client applications.
14 | # 8. Run LM-Proxy with this config:
15 | #    > lm-proxy --config vertex-ai.toml
16 | 
17 | [connections]
18 | [connections.vertex_ai]
19 | api_type = "google_vertex_ai"
20 | google_vertex_project_id = "<PROJECT_ID>"
21 | google_vertex_gcloud_auth = true
22 | google_vertex_response_validation = true
23 | model = "gemini-2.5-pro"
24 | 
25 | [routing]
26 | "gemini-2.5-pro" = "vertex_ai.gemini-2.5-pro"
27 | 
28 | [groups.default]
29 | # Your Virual API Keys here
30 | api_keys = [
31 |   "KEY1",
32 |   "KEY2"
33 | ]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Vitalii Stepanenko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pytest
 3 | import subprocess
 4 | import time
 5 | import signal
 6 | from pathlib import Path
 7 | from dataclasses import dataclass, field
 8 | from typing import Any
 9 | import microcore as mc
10 | 
11 | 
12 | @dataclass
13 | class ServerFixture:
14 |     port: int
15 |     process: Any
16 |     api_key: str
17 |     model: str = field(default=None)
18 | 
19 | 
20 | @pytest.fixture(scope="session")
21 | def server_config_fn():
22 |     """Fixture that starts the LM-Proxy server for testing and stops it after tests complete."""
23 |     test_config_path = Path("tests/configs/config_fn.py")
24 |     server_process = subprocess.Popen(
25 |         [sys.executable, "-m", "lm_proxy.app", "--config", str(test_config_path)],
26 |     )
27 |     time.sleep(2)
28 |     from tests.configs.config_fn import config
29 | 
30 |     yield ServerFixture(
31 |         port=config.port,
32 |         process=server_process,
33 |         model="any-model",
34 |         api_key="py-test",
35 |     )
36 |     server_process.send_signal(signal.SIGTERM)
37 |     server_process.wait()
38 | 
39 | 
40 | async def llm_ok_connection(*args, **kwargs):
41 |     return mc.LLMResponse("ok")
42 | 


--------------------------------------------------------------------------------
/multi-build.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | import subprocess
 4 | 
 5 | NAMES = [
 6 |     ["lm-proxy", "LM-Proxy"],
 7 |     ["llm-proxy-server", "LLM Proxy Server"],
 8 |     ["ai-proxy-server", "AI Proxy Server"],
 9 |     ["lm-proxy-server", "LM Proxy Server"],
10 |     ["openai-http-proxy", "OpenAI HTTP Proxy"],
11 |     ["inference-proxy", "Inference Proxy"],
12 |     ["oai-proxy", "OAI Proxy"],
13 | ]
14 | FILES = [
15 |     "pyproject.toml",
16 |     "README.md",
17 | ]
18 | 
19 | 
20 | def replace_name(old_names: list[str], new_names: list[str], files: list[str] = None):
21 |     files = files or FILES
22 |     for i in range(len(old_names)):
23 |         old_name = old_names[i]
24 |         new_name = new_names[i]
25 |         for path in files:
26 |             p = Path(path)
27 |             p.write_text(
28 |                 re.sub(
29 |                     fr'(?<![\\/\w]){old_name}\b',
30 |                     new_name,
31 |                     p.read_text(encoding="utf-8"),
32 |                     flags=re.M
33 |                 ), encoding="utf-8"
34 |             )
35 | 
36 | 
37 | prev = NAMES[0]
38 | for nxt in NAMES[1:]+[NAMES[0]]:
39 |     print(f"Building for project name: {nxt[0]}...")
40 |     replace_name(prev, nxt)
41 |     subprocess.run(["poetry", "build"], check=True)
42 |     prev = nxt
43 | print("All builds completed.")
44 | 


--------------------------------------------------------------------------------
/config.toml:
--------------------------------------------------------------------------------
 1 | # This is an lm-proxy configuration example
 2 | 
 3 | host="0.0.0.0"
 4 | port=8000
 5 | # dev_autoreload=true
 6 | 
 7 | # Validates a Client API key against configured groups and returns the matching group.
 8 | api_key_check="lm_proxy.api_key_check.check_api_key_in_config"
 9 | 
10 | model_listing_mode = "as_is"
11 | 
12 | [connections]
13 | 
14 | [connections.openai]
15 | api_type = "open_ai"
16 | api_base = "https://api.openai.com/v1/"
17 | api_key  = "env:OPENAI_API_KEY"
18 | 
19 | [connections.google]
20 | api_type = "google_ai_studio"
21 | api_key  = "env:GOOGLE_API_KEY"
22 | 
23 | [connections.anthropic]
24 | api_type = "anthropic"
25 | api_key  = "env:ANTHROPIC_API_KEY"
26 | 
27 | [routing]
28 | "gpt*" = "openai.*"
29 | "claude*" = "anthropic.*"
30 | "gemini*" = "google.*"
31 | "*" = "openai.gpt-5"
32 | 
33 | [groups.default]
34 | api_keys = [
35 |     "KEY1",
36 |     "KEY2"
37 | ]
38 | 
39 | # optional, logs successful LLM requests
40 | [[loggers]]
41 | class = 'lm_proxy.loggers.BaseLogger'
42 | [loggers.log_writer]
43 | class = 'lm_proxy.loggers.JsonLogWriter'
44 | file_name = 'storage/json.log'
45 | [loggers.entry_transformer]
46 | class = 'lm_proxy.loggers.LogEntryTransformer'
47 | completion_tokens = "response.usage.completion_tokens"
48 | prompt_tokens = "response.usage.prompt_tokens"
49 | prompt = "request.messages"
50 | response = "response"
51 | group = "group"
52 | connection = "connection"
53 | api_key_id = "api_key_id"
54 | remote_addr = "remote_addr"
55 | created_at = "created_at"
56 | duration = "duration"


--------------------------------------------------------------------------------
/examples/load_balancer_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # Load Balancer Configuration Example
 3 | 
 4 | This example demonstrates how to set up a load balancer that randomly
 5 | distributes requests across multiple language model servers using the lm_proxy.
 6 | 
 7 | Steps to run:
 8 | 1. Save this script as `load_balancer_config.py`.
 9 | 2. Create a `.env` file in the same directory with your API keys.
10 | 3. Run the lm-proxy server with this configuration:
11 | ```bash
12 | lm-proxy --config load_balancer_config.py
13 | ```
14 | """
15 | 
16 | import logging
17 | import os
18 | import random
19 | from dotenv import load_dotenv
20 | from lm_proxy.config import Config, Group
21 | from lm_proxy.bootstrap import env
22 | 
23 | load_dotenv(".env")
24 | 
25 | 
26 | async def load_balancer(*args, **kwargs):
27 |     connection_name = random.choice(
28 |         [i for i in env.config.connections.keys() if i != "load_balancer"]
29 |     )
30 |     logging.info(f"Load balancer selected connection: {connection_name}")
31 |     kwargs.pop("model", None)  # remove model to avoid confusion
32 |     return await env.connections[connection_name](*args, **kwargs)
33 | 
34 | 
35 | config = Config(
36 |     connections=dict(
37 |         load_balancer=load_balancer,
38 |         server1={
39 |             "api_type": "openai",
40 |             "api_base": "https://api.openai.com/v1",
41 |             "api_key": os.getenv("OPENAI_API_KEY"),
42 |             "model": "gpt-5-mini",
43 |         },
44 |         server2={
45 |             "api_type": "anthropic",
46 |             "api_key": os.getenv("ANTHROPIC_API_KEY"),
47 |             "model": "claude-3-5-haiku-20241022",
48 |         },
49 |     ),
50 |     routing={"*": "load_balancer.*"},
51 |     groups=dict(default=Group(connections="load_balancer", api_keys=["KEY1"])),
52 | )
53 | 


--------------------------------------------------------------------------------
/tests/test_models_endpoint.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from starlette.requests import Request
 4 | 
 5 | from lm_proxy.config import Config, ModelListingMode
 6 | from lm_proxy.bootstrap import bootstrap, env
 7 | from lm_proxy.models_endpoint import models
 8 | 
 9 | 
10 | async def test_models_endpoint():
11 |     async def dummy_inference(prompt):
12 |         return "dummy response"
13 | 
14 |     bootstrap(
15 |         Config(
16 |             connections={
17 |                 "A": dummy_inference,
18 |                 "B": dummy_inference,
19 |             },
20 |             routing={
21 |                 "a": "A.a",
22 |                 "b": "B.b",
23 |                 "my-gpt-*": "B.*",
24 |                 "*": "A.*",
25 |             },
26 |             model_listing_mode=ModelListingMode.AS_IS,
27 |             groups={
28 |                 "default": dict(
29 |                     connections=["A", "B"],
30 |                     api_keys=["testkey"],
31 |                 ),
32 |                 "only_b": dict(
33 |                     connections=["B"],
34 |                     api_keys=["bkey"],
35 |                 ),
36 |             },
37 |         )
38 |     )
39 | 
40 |     req = Request(
41 |         dict(
42 |             type="http",
43 |             headers=[(b"authorization", b"Bearer testkey")],
44 |         )
45 |     )
46 | 
47 |     payload = json.loads((await models(req)).body.decode())
48 | 
49 |     assert isinstance(payload, dict)
50 |     assert "data" in payload
51 |     assert isinstance(payload["data"], list)
52 |     assert len(payload["data"]) == 4
53 |     env.config.model_listing_mode = ModelListingMode.IGNORE_WILDCARDS
54 |     payload = json.loads((await models(req)).body.decode())
55 |     assert len(payload["data"]) == 2  # Only 'a' and 'b'
56 |     assert payload["data"][0]["id"] in ("a", "b")
57 |     assert payload["data"][1]["id"] in ("a", "b")
58 | 


--------------------------------------------------------------------------------
/lm_proxy/api_key_check/allow_all.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Permissive API key validator that accepts any key and assigns it to a default group.
 3 | 
 4 | This module provides a simple authentication strategy for development or testing
 5 | environments where all API keys should be accepted without validation.
 6 | """
 7 | from typing import Optional
 8 | from dataclasses import dataclass
 9 | 
10 | 
11 | @dataclass(slots=True, frozen=True)
12 | class AllowAll:
13 |     """
14 |     A pass-through API key validator that accepts all keys without verification.
15 | 
16 |     This validator is useful for development environments, testing scenarios, or
17 |     applications where authentication is handled elsewhere in the stack.
18 | 
19 |     Attributes:
20 |         group: The group identifier assigned to all validated keys.
21 |         capture_api_key: Whether to include the raw API key in the returned
22 |             metadata. Set to False in production to avoid logging sensitive data.
23 |     """
24 | 
25 |     group: str = "default"
26 |     capture_api_key: bool = True
27 | 
28 |     def __call__(
29 |         self,
30 |         api_key: Optional[str]
31 |     ) -> tuple[str, dict[str, Optional[str]]]:
32 |         """
33 |         Validate an API key (accepts all keys without verification).
34 | 
35 |         Args:
36 |             api_key: The API key to validate. Can be None.
37 | 
38 |         Returns:
39 |             A tuple containing:
40 |                 - The default group identifier (str)
41 |                 - user_info dictionary with the API key if capture_api_key
42 |                   is True, otherwise an empty dictionary
43 | 
44 |         Note:
45 |             This method never raises authentication errors and always returns
46 |             successfully, regardless of the input.
47 |         """
48 |         user_info = {"api_key": api_key} if self.capture_api_key else {}
49 |         return self.group, user_info
50 | 


--------------------------------------------------------------------------------
/lm_proxy/models_endpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Models list endpoint
 3 | """
 4 | 
 5 | from starlette.requests import Request
 6 | from starlette.responses import JSONResponse
 7 | 
 8 | from .bootstrap import env
 9 | from .core import check, parse_routing_rule
10 | from .config import ModelListingMode, Group
11 | 
12 | 
13 | async def models(request: Request) -> JSONResponse:
14 |     """
15 |     Lists available models based on routing rules and group permissions.
16 |     """
17 |     group_name, api_key, user_info = await check(request)
18 |     group: Group = env.config.groups[group_name]
19 |     models_list = []
20 |     for model_pattern, route in env.config.routing.items():
21 |         connection_name, _ = parse_routing_rule(route, env.config)
22 |         if group.allows_connecting_to(connection_name):
23 |             is_model_name = not ("*" in model_pattern or "?" in model_pattern)
24 |             if not is_model_name:
25 |                 if env.config.model_listing_mode != ModelListingMode.AS_IS:
26 |                     if (
27 |                         env.config.model_listing_mode
28 |                         == ModelListingMode.IGNORE_WILDCARDS
29 |                     ):
30 |                         continue
31 |                     raise NotImplementedError(
32 |                         f"'{env.config.model_listing_mode}' model listing mode "
33 |                         f"is not implemented yet"
34 |                     )
35 |             model_data = {
36 |                     "id": model_pattern,
37 |                     "object": "model",
38 |                     "created": 0,
39 |                     "owned_by": connection_name,
40 |                 }
41 | 
42 |             if aux_info := env.config.model_info.get(model_pattern):
43 |                 model_data.update(aux_info)
44 |             models_list.append(model_data)
45 | 
46 |     return JSONResponse(
47 |         {
48 |             "object": "list",
49 |             "data": models_list,
50 |         }
51 |     )
52 | 


--------------------------------------------------------------------------------
/.github/workflows/gito-code-review.yml:
--------------------------------------------------------------------------------
 1 | name: "Gito: AI Code Reviewer"
 2 | on:
 3 |   pull_request:
 4 |     types: [opened, synchronize, reopened]
 5 |   workflow_dispatch:
 6 |     inputs:
 7 |       pr_number:
 8 |         description: "Pull Request number"
 9 |         required: true
10 | jobs:
11 |   review:
12 |     runs-on: ubuntu-latest
13 |     permissions: { contents: read, pull-requests: write } # 'write' for leaving the summary comment
14 |     steps:
15 |     - uses: actions/checkout@v4
16 |       with: { fetch-depth: 0 }
17 | 
18 |     - name: Set up Python
19 |       uses: actions/setup-python@v5
20 |       with: { python-version: "3.13" }
21 | 
22 |     - name: Fetch Latest Gito Version
23 |       id: gito-version
24 |       run: pip index versions gito.bot 2>/dev/null | head -1 | sed -n 's/.* (\([^)]*\)).*/version=\1/p' >> $GITHUB_OUTPUT
25 | 
26 |     - uses: actions/cache@v4
27 |       id: cache
28 |       with:
29 |         path: |
30 |           ${{ env.pythonLocation }}/lib/python3.13/site-packages
31 |           ${{ env.pythonLocation }}/bin
32 |         key: gito_v${{ steps.gito-version.outputs.version }}
33 | 
34 |     - name: Install Gito
35 |       if: steps.cache.outputs.cache-hit != 'true'
36 |       run: pip install gito.bot~=3.2
37 | 
38 |     - name: Run AI code review
39 |       env:
40 |         LLM_API_TYPE: open_ai
41 |         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
42 |         MODEL: gpt-5
43 |         JIRA_TOKEN: ${{ secrets.JIRA_TOKEN }}
44 |         JIRA_URL: ${{ secrets.JIRA_URL }}
45 |         JIRA_USER: ${{ secrets.JIRA_USER }}
46 |         LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }}
47 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
48 |         PR_NUMBER_FROM_WORKFLOW_DISPATCH: ${{ github.event.inputs.pr_number }}
49 |       run: |
50 |         gito --verbose review
51 |         gito github-comment
52 | 
53 |     - uses: actions/upload-artifact@v4
54 |       with:
55 |         name: gito-code-review-results
56 |         path: |
57 |           code-review-report.md
58 |           code-review-report.json


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | permissions:
10 |   contents: write
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.11", "3.12", "3.13"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         pip install poetry
30 |         poetry install
31 |         echo "$(poetry env info --path)/bin" >> $GITHUB_PATH
32 |     - name: Test with pytest
33 |       if: matrix.python-version != '3.13'
34 |       env:
35 |         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
36 |       run: |
37 |         pytest
38 |     - name: Test with pytest +coverage
39 |       if: matrix.python-version == '3.13'
40 |       env:
41 |         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
42 |       run: |
43 |         pytest --cov=lm_proxy --cov-report=xml
44 |     - name: Generate coverage badge
45 |       if: matrix.python-version == '3.13' && (github.event_name == 'push' || github.event_name == 'pull_request')
46 |       uses: tj-actions/coverage-badge-py@v2
47 |       with:
48 |         output: 'coverage.svg'
49 |     - name: Commit coverage badge
50 |       if: matrix.python-version == '3.13' && (github.event_name == 'push' || github.event_name == 'pull_request')
51 |       run: |
52 |         git config --local user.email "action@github.com"
53 |         git config --local user.name "GitHub Action"
54 |         git fetch origin
55 |         git checkout ${{ github.head_ref || github.ref_name }} --
56 |         git add coverage.svg
57 |         git commit -m "Update coverage badge [skip ci]" || echo "No changes to commit"
58 |         git push
59 |       env:
60 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
61 | 
62 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "lm-proxy"
 3 | version = "2.1.1"
 4 | description = "\"LM-Proxy\" is OpenAI-compatible http proxy server for inferencing various LLMs capable of working with Google, Anthropic, OpenAI APIs, local PyTorch inference, etc."
 5 | readme = "README.md"
 6 | keywords = ["llm", "large language models", "ai", "gpt", "openai", "proxy", "http", "proxy-server"]
 7 | classifiers = [
 8 |     "Intended Audience :: Developers",
 9 |     "Operating System :: OS Independent",
10 |     "Programming Language :: Python :: 3",
11 |     "Programming Language :: Python :: 3.11",
12 |     "Programming Language :: Python :: 3.12",
13 |     "Programming Language :: Python :: 3.13",
14 |     "License :: OSI Approved :: MIT License",
15 | ]
16 | dependencies = [
17 |     "ai-microcore~=4.4.4",
18 |     "fastapi>=0.121.3,<1",
19 |     "uvicorn>=0.22.0",
20 |     "typer>=0.16.1",
21 |     "requests~=2.32.3",
22 |     "pydantic~=2.12.3",
23 | ]
24 | 
25 | requires-python = ">=3.11,<4"
26 | 
27 | authors = [
28 |     { name = "Vitalii Stepanenko", email = "mail@vitaliy.in" },
29 | ]
30 | maintainers = [
31 |     { name = "Vitalii Stepanenko", email = "mail@vitaliy.in" },
32 | ]
33 | license = { file = "LICENSE" }
34 | 
35 | [project.urls]
36 | "Source Code" = "https://github.com/Nayjest/lm-proxy"
37 | 
38 | [project.entry-points."config.loaders"]
39 | toml = "lm_proxy.config_loaders:load_toml_config"
40 | py = "lm_proxy.config_loaders:load_python_config"
41 | yml = "lm_proxy.config_loaders:load_yaml_config"
42 | yaml = "lm_proxy.config_loaders:load_yaml_config"
43 | json = "lm_proxy.config_loaders:load_json_config"
44 | 
45 | [build-system]
46 | requires = ["poetry-core"]
47 | build-backend = "poetry.core.masonry.api"
48 | 
49 | [tool.poetry]
50 | package-mode = true
51 | packages = [{ include = "lm_proxy"}]
52 | 
53 | [tool.poetry.group.test.dependencies]
54 | pytest = "~=8.4.2"
55 | pytest-asyncio = "~=1.2.0"
56 | pytest-cov = "~7.0.0"
57 | 
58 | [tool.poetry.scripts]
59 | lm-proxy = "lm_proxy.app:cli_app"
60 | 
61 | [tool.pytest.ini_options]
62 | asyncio_mode = "auto"
63 | testpaths = [
64 |     "tests",
65 | ]
66 | 


--------------------------------------------------------------------------------
/lm_proxy/app.py:
--------------------------------------------------------------------------------
 1 | """
 2 | LM-Proxy Application Entrypoint
 3 | """
 4 | import logging
 5 | from typing import Optional
 6 | from fastapi import FastAPI
 7 | import typer
 8 | import uvicorn
 9 | 
10 | from .bootstrap import env, bootstrap
11 | from .core import chat_completions
12 | from .models_endpoint import models
13 | 
14 | cli_app = typer.Typer()
15 | 
16 | 
17 | @cli_app.callback(invoke_without_command=True)
18 | def run_server(
19 |     config: Optional[str] = typer.Option(None, help="Path to the configuration file"),
20 |     debug: Optional[bool] = typer.Option(
21 |         None, help="Enable debug mode (more verbose logging)"
22 |     ),
23 |     env_file: Optional[str] = typer.Option(
24 |         ".env",
25 |         "--env",
26 |         "--env-file",
27 |         "--env_file",
28 |         help="Set the .env file to load ENV vars from",
29 |     ),
30 | ):
31 |     """
32 |     Default command for CLI application: Run LM-Proxy web server
33 |     """
34 |     try:
35 |         bootstrap(config=config or "config.toml", env_file=env_file, debug=debug)
36 |         uvicorn.run(
37 |             "lm_proxy.app:web_app",
38 |             host=env.config.host,
39 |             port=env.config.port,
40 |             ssl_keyfile=env.config.ssl_keyfile or None,
41 |             ssl_certfile=env.config.ssl_certfile or None,
42 |             reload=env.config.dev_autoreload,
43 |             factory=True,
44 |         )
45 |     except Exception as e:
46 |         if env.debug:
47 |             raise
48 |         logging.error(e)
49 |         raise typer.Exit(code=1)
50 | 
51 | 
52 | def web_app():
53 |     """
54 |     Entrypoint for ASGI server
55 |     """
56 |     app = FastAPI(
57 |         title="LM-Proxy", description="OpenAI-compatible proxy server for LLM inference"
58 |     )
59 |     app.add_api_route(
60 |         path=f"{env.config.api_prefix}/chat/completions",
61 |         endpoint=chat_completions,
62 |         methods=["POST"],
63 |     )
64 |     app.add_api_route(
65 |         path=f"{env.config.api_prefix}/models",
66 |         endpoint=models,
67 |         methods=["GET"],
68 |     )
69 |     return app
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     cli_app()
74 | 


--------------------------------------------------------------------------------
/lm_proxy/base_types.py:
--------------------------------------------------------------------------------
 1 | """Base types used in LM-Proxy."""
 2 | import uuid
 3 | from dataclasses import dataclass, field
 4 | from datetime import datetime
 5 | from typing import List, Optional, TYPE_CHECKING
 6 | 
 7 | import microcore as mc
 8 | from pydantic import BaseModel
 9 | 
10 | if TYPE_CHECKING:
11 |     from .config import Group
12 | 
13 | 
14 | class ChatCompletionRequest(BaseModel):
15 |     """
16 |     Request model for chat/completions endpoint.
17 |     """
18 |     model: str
19 |     messages: List[mc.Msg | dict]
20 |     # | dict --> support of messages with lists of dicts
21 |     # defining distinct content-parts inside 'content' field
22 |     stream: Optional[bool] = None
23 |     max_tokens: Optional[int] = None
24 |     temperature: Optional[float] = None
25 |     top_p: Optional[float] = None
26 |     n: Optional[int] = None
27 |     stop: Optional[List[str]] = None
28 |     presence_penalty: Optional[float] = None
29 |     frequency_penalty: Optional[float] = None
30 |     user: Optional[str] = None
31 | 
32 | 
33 | @dataclass
34 | class RequestContext:  # pylint: disable=too-many-instance-attributes
35 |     """
36 |     Stores information about a single LLM request/response cycle for usage in middleware.
37 |     """
38 |     id: Optional[str] = field(default_factory=lambda: str(uuid.uuid4()))
39 |     request: Optional[ChatCompletionRequest] = field(default=None)
40 |     response: Optional[mc.LLMResponse] = field(default=None)
41 |     error: Optional[Exception] = field(default=None)
42 |     group: Optional["Group"] = field(default=None)
43 |     connection: Optional[str] = field(default=None)
44 |     model: Optional[str] = field(default=None)
45 |     api_key_id: Optional[str] = field(default=None)
46 |     remote_addr: Optional[str] = field(default=None)
47 |     created_at: Optional[datetime] = field(default_factory=datetime.now)
48 |     duration: Optional[float] = field(default=None)
49 |     user_info: Optional[dict] = field(default=None)
50 |     extra: dict = field(default_factory=dict)
51 | 
52 |     def to_dict(self) -> dict:
53 |         """Export as dictionary."""
54 |         data = self.__dict__.copy()
55 |         if self.request:
56 |             data["request"] = self.request.model_dump(mode="json")
57 |         return data
58 | 


--------------------------------------------------------------------------------
/tests/test_loggers.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import microcore as mc
 4 | 
 5 | from lm_proxy.core import log_non_blocking
 6 | from lm_proxy.base_types import ChatCompletionRequest, RequestContext
 7 | from lm_proxy.config import Config
 8 | from lm_proxy.bootstrap import bootstrap
 9 | from lm_proxy.utils import CustomJsonEncoder
10 | 
11 | 
12 | async def test_custom_config():
13 | 
14 |     logs = []
15 |     bootstrap(
16 |         Config(
17 |             connections={},
18 |             loggers=[
19 |                 {
20 |                     "class": "lm_proxy.loggers.BaseLogger",
21 |                     "log_writer": lambda data: logs.append(
22 |                         json.dumps(data, cls=CustomJsonEncoder)
23 |                     ),
24 |                 }
25 |             ],
26 |         )
27 |     )
28 |     request = ChatCompletionRequest(
29 |         model="gpt-3.5-turbo",
30 |         messages=[{"role": "user", "content": "Test request message"}],
31 |     )
32 |     response = mc.LLMResponse("Test response message", dict(prompt=request.messages))
33 |     task = await log_non_blocking(RequestContext(request=request, response=response))
34 |     if task:
35 |         await task
36 |     assert len(logs) == 1
37 |     log_data = json.loads(logs[0])
38 |     assert log_data["request"]["model"] == "gpt-3.5-turbo"
39 |     assert log_data["response"] == "Test response message"
40 | 
41 | 
42 | async def test_json(tmp_path):
43 |     bootstrap(
44 |         Config(
45 |             connections={},
46 |             loggers=[
47 |                 {
48 |                     "class": "lm_proxy.loggers.BaseLogger",
49 |                     "log_writer": {
50 |                         "class": "lm_proxy.loggers.JsonLogWriter",
51 |                         "file_name": tmp_path / "json_log.log",
52 |                     },
53 |                 }
54 |             ],
55 |         )
56 |     )
57 |     request = ChatCompletionRequest(
58 |         model="gpt-3.5-turbo",
59 |         messages=[{"role": "user", "content": "Test request message"}],
60 |     )
61 |     response = mc.LLMResponse("Test response message", dict(prompt=request.messages))
62 |     task = await log_non_blocking(RequestContext(request=request, response=response))
63 |     if task:
64 |         await task
65 |     task = await log_non_blocking(RequestContext(request=request, response=response))
66 |     if task:
67 |         await task
68 |     with open(tmp_path / "json_log.log", "r") as f:
69 |         lines = f.readlines()
70 |         assert len(lines) == 2
71 |         log_data = json.loads(lines[0])
72 |         assert log_data["request"]["model"] == "gpt-3.5-turbo"
73 |         assert log_data["response"] == "Test response message"
74 | 


--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
 1 | import microcore as mc
 2 | import requests
 3 | from tests.conftest import ServerFixture
 4 | 
 5 | 
 6 | def configure_mc_to_use_local_proxy(cfg: ServerFixture):
 7 |     mc.configure(
 8 |         LLM_API_TYPE="openai",
 9 |         LLM_API_BASE=f"http://127.0.0.1:{cfg.port}/v1",  # Test server port
10 |         LLM_API_KEY=cfg.api_key,  # Not used but required
11 |         MODEL=cfg.model,
12 |     )
13 | 
14 | 
15 | def test_france_capital_query(server_config_fn: ServerFixture):
16 |     configure_mc_to_use_local_proxy(server_config_fn)
17 |     response = mc.llm("What is the capital of France?\n (!) Respond with 1 word.")
18 |     assert (
19 |         "paris" in response.lower().strip()
20 |     ), f"Expected 'Paris' in response, got: {response}"
21 | 
22 | 
23 | def test_direct_api_call(server_config_fn: ServerFixture):
24 |     """Test directly calling the API without microcore."""
25 |     cfg = server_config_fn
26 |     response = requests.post(
27 |         f"http://127.0.0.1:{cfg.port}/v1/chat/completions",
28 |         json={
29 |             "model": cfg.model,
30 |             "messages": [{"role": "user", "content": "What is the capital of France?"}],
31 |         },
32 |         headers={
33 |             "Content-Type": "application/json",
34 |             "authorization": f"bearer {cfg.api_key}",
35 |         },
36 |         timeout=120,
37 |     )
38 | 
39 |     assert (
40 |         response.status_code == 200
41 |     ), f"Expected status code 200, got {response.status_code}"
42 | 
43 |     data = response.json()
44 |     assert "choices" in data, f"Missing 'choices' in response: {data}"
45 |     assert len(data["choices"]) > 0, "No choices returned"
46 |     assert (
47 |         "message" in data["choices"][0]
48 |     ), f"Missing 'message' in first choice: {data['choices'][0]}"
49 |     assert (
50 |         "Paris" in data["choices"][0]["message"]["content"]
51 |     ), f"Expected 'Paris' in response, got: {data['choices'][0]['message']['content']}"
52 | 
53 | 
54 | def test_streaming_response(server_config_fn: ServerFixture):
55 |     configure_mc_to_use_local_proxy(server_config_fn)
56 |     collected_text = []
57 |     mc.llm(
58 |         "Count from 1 to 5, each number as english word (one, two, ...) on a new line",
59 |         callback=lambda chunk: collected_text.append(str(chunk).lower()),
60 |     )
61 |     full_response = "".join(collected_text)
62 |     for i in ["one", "two", "three", "four", "five"]:
63 |         assert i in full_response, f"Expected '{i}' in response, got: {full_response}"
64 |     assert len(collected_text) >= 1
65 | 
66 | 
67 | def test_models(server_config_fn: ServerFixture):
68 |     """Test directly calling the API without microcore."""
69 |     cfg = server_config_fn
70 |     from openai import OpenAI
71 | 
72 |     client = OpenAI(api_key=cfg.api_key, base_url=f"http://127.0.0.1:{cfg.port}/v1")
73 |     models = client.models.list()
74 |     assert len(models.data) == 2, "Wrong models returned"
75 |     model_ids = {model.id for model in models.data}
76 |     assert model_ids == {"my-gpt", "*"}
77 | 


--------------------------------------------------------------------------------
/lm_proxy/loggers.py:
--------------------------------------------------------------------------------
 1 | """LLM Request logging."""
 2 | import abc
 3 | import json
 4 | import os
 5 | from dataclasses import dataclass, field
 6 | from typing import Union, Callable
 7 | 
 8 | from .base_types import RequestContext
 9 | from .utils import CustomJsonEncoder, resolve_instance_or_callable, resolve_obj_path
10 | 
11 | 
12 | class AbstractLogEntryTransformer(abc.ABC):  # pylint: disable=too-few-public-methods
13 |     """Transform RequestContext into a dictionary of logged attributes."""
14 |     @abc.abstractmethod
15 |     def __call__(self, request_context: RequestContext) -> dict:
16 |         raise NotImplementedError()
17 | 
18 | 
19 | class AbstractLogWriter(abc.ABC):  # pylint: disable=too-few-public-methods
20 |     """Writes the logged data to a destination."""
21 |     @abc.abstractmethod
22 |     def __call__(self, logged_data: dict):
23 |         raise NotImplementedError()
24 | 
25 | 
26 | class LogEntryTransformer(AbstractLogEntryTransformer):  # pylint: disable=too-few-public-methods
27 |     """
28 |     Transforms RequestContext into a dictionary of logged attributes.
29 |     The mapping is provided as keyword arguments, where keys are the names of the
30 |     logged attributes, and values are the paths to the attributes in RequestContext.
31 |     """
32 |     def __init__(self, **kwargs):
33 |         self.mapping = kwargs
34 | 
35 |     def __call__(self, request_context: RequestContext) -> dict:
36 |         result = {}
37 |         for key, path in self.mapping.items():
38 |             result[key] = resolve_obj_path(request_context, path)
39 |         return result
40 | 
41 | 
42 | @dataclass
43 | class BaseLogger:
44 |     """Base LLM request logger."""
45 |     log_writer: AbstractLogWriter | str | dict
46 |     entry_transformer: AbstractLogEntryTransformer | str | dict = field(default=None)
47 | 
48 |     def __post_init__(self):
49 |         self.entry_transformer = resolve_instance_or_callable(
50 |             self.entry_transformer,
51 |             debug_name="logging.<logger>.entry_transformer",
52 |         )
53 |         self.log_writer = resolve_instance_or_callable(
54 |             self.log_writer,
55 |             debug_name="logging.<logger>.log_writer",
56 |         )
57 | 
58 |     def _transform(self, request_context: RequestContext) -> dict:
59 |         return (
60 |             self.entry_transformer(request_context)
61 |             if self.entry_transformer
62 |             else request_context.to_dict()
63 |         )
64 | 
65 |     def __call__(self, request_context: RequestContext):
66 |         self.log_writer(self._transform(request_context))
67 | 
68 | 
69 | @dataclass
70 | class JsonLogWriter(AbstractLogWriter):
71 |     """Writes logged data to a JSON file."""
72 |     file_name: str
73 | 
74 |     def __post_init__(self):
75 |         dir_path = os.path.dirname(self.file_name)
76 |         if dir_path:
77 |             os.makedirs(dir_path, exist_ok=True)
78 |         # Create the file if it doesn't exist
79 |         with open(self.file_name, "a", encoding="utf-8"):
80 |             pass
81 | 
82 |     def __call__(self, logged_data: dict):
83 |         with open(self.file_name, "a", encoding="utf-8") as f:
84 |             f.write(json.dumps(logged_data, cls=CustomJsonEncoder) + "\n")
85 | 
86 | 
87 | TLogger = Union[BaseLogger, Callable[[RequestContext], None]]
88 | 


--------------------------------------------------------------------------------
/.github/workflows/gito-react-to-comments.yml:
--------------------------------------------------------------------------------
 1 | name: "Gito: React to GitHub comment"
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | permissions:
 8 |   contents: write  # to make PR
 9 |   issues: write
10 |   pull-requests: write
11 |   # read: to download the code review artifact
12 |   # write: to trigger other actions
13 |   actions: write
14 | 
15 | jobs:
16 |   process-comment:
17 |     if: |
18 |       github.event.issue.pull_request &&
19 |       (
20 |         github.event.comment.author_association == 'OWNER' ||
21 |         github.event.comment.author_association == 'MEMBER' ||
22 |         github.event.comment.author_association == 'COLLABORATOR'
23 |       ) &&
24 |       (
25 |         startsWith(github.event.comment.body, '/') ||
26 |         startsWith(github.event.comment.body, 'gito') ||
27 |         startsWith(github.event.comment.body, 'ai') ||
28 |         startsWith(github.event.comment.body, 'bot') ||
29 |         contains(github.event.comment.body, '@gito') ||
30 |         contains(github.event.comment.body, '@ai') ||
31 |         contains(github.event.comment.body, '@bot')
32 |       )
33 |     runs-on: ubuntu-latest
34 | 
35 |     steps:
36 |     - name: Get PR details
37 |       id: pr
38 |       uses: actions/github-script@v7
39 |       with:
40 |         script: |
41 |           const pr = await github.rest.pulls.get({
42 |             owner: context.repo.owner,
43 |             repo: context.repo.repo,
44 |             pull_number: context.issue.number
45 |           });
46 |           return {
47 |             head_ref: pr.data.head.ref,
48 |             head_sha: pr.data.head.sha,
49 |             base_ref: pr.data.base.ref
50 |           };
51 | 
52 |     - name: Checkout repository
53 |       uses: actions/checkout@v4
54 |       with:
55 |         repository: ${{ github.repository }}
56 |         token: ${{ secrets.GITHUB_TOKEN }}
57 |         ref: ${{ fromJson(steps.pr.outputs.result).head_ref }}
58 |         fetch-depth: 0
59 | 
60 |     - name: Set up Python
61 |       uses: actions/setup-python@v5
62 |       with: { python-version: "3.13" }
63 | 
64 |     - name: Fetch Latest Gito Version
65 |       id: gito-version
66 |       run: pip index versions gito.bot 2>/dev/null | head -1 | sed -n 's/.* (\([^)]*\)).*/version=\1/p' >> $GITHUB_OUTPUT
67 | 
68 |     - uses: actions/cache@v4
69 |       id: cache
70 |       with:
71 |         path: |
72 |           ${{ env.pythonLocation }}/lib/python3.13/site-packages
73 |           ${{ env.pythonLocation }}/bin
74 |         key: gito_v${{ steps.gito-version.outputs.version }}
75 | 
76 |     - name: Install Gito
77 |       if: steps.cache.outputs.cache-hit != 'true'
78 |       run: pip install gito.bot~=3.2
79 | 
80 |     - name: Run Gito react
81 |       env:
82 |         # LLM config is needed only if answer_github_comments = true in .gito/config.toml
83 |         # Otherwise, use LLM_API_TYPE: none
84 |         LLM_API_TYPE: open_ai
85 |         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
86 |         MODEL: gpt-4.1
87 |         JIRA_TOKEN: ${{ secrets.JIRA_TOKEN }}
88 |         JIRA_URL: ${{ secrets.JIRA_URL }}
89 |         JIRA_USER: ${{ secrets.JIRA_USER }}
90 |         LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }}
91 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
92 |       run: |
93 |         gito react-to-comment ${{ github.event.comment.id }}


--------------------------------------------------------------------------------
/lm_proxy/api_key_check/with_request.py:
--------------------------------------------------------------------------------
 1 | """
 2 | API key check implementation using HTTP requests.
 3 | """
 4 | from typing import Optional
 5 | from dataclasses import dataclass, field
 6 | import requests
 7 | 
 8 | from ..config import TApiKeyCheckFunc
 9 | 
10 | 
11 | @dataclass(slots=True)
12 | class CheckAPIKeyWithRequest:  # pylint: disable=too-many-instance-attributes
13 |     """
14 |     Validates a Client API key by making an HTTP request to a specified URL.
15 |     """
16 |     url: str = field()
17 |     method: str = field(default="get")
18 |     headers: dict = field(default_factory=dict)
19 |     response_as_user_info: bool = field(default=False)
20 |     group_field: Optional[str] = field(default=None)
21 |     """
22 |     Field in the JSON response to extract the user group.
23 |     """
24 |     default_group: str = field(default="default")
25 |     """
26 |     User group to assign if group_field is not used.
27 |     """
28 |     key_placeholder: str = field(default="{api_key}")
29 |     use_cache: bool = field(default=False)
30 |     """
31 |     Whether to cache the results of API key checks.
32 |     Requires 'cachetools' package if set to True.
33 |     """
34 |     cache_size: int = field(default=1024 * 16)
35 |     cache_ttl: int = field(default=60 * 5)  # 5 minutes
36 |     timeout: int = field(default=5)  # seconds
37 |     _func: TApiKeyCheckFunc = field(init=False, repr=False)
38 | 
39 |     def __post_init__(self):
40 |         def check_func(api_key: str) -> Optional[tuple[str, dict]]:
41 |             try:
42 |                 url = self.url.replace(self.key_placeholder, api_key)
43 |                 headers = {
44 |                     k: str(v).replace(self.key_placeholder, api_key)
45 |                     for k, v in self.headers.items()
46 |                 }
47 |                 response = requests.request(
48 |                     method=self.method,
49 |                     url=url,
50 |                     headers=headers,
51 |                     timeout=self.timeout
52 |                 )
53 |                 response.raise_for_status()
54 |                 group = self.default_group
55 |                 user_info = None
56 |                 if self.response_as_user_info:
57 |                     user_info = response.json()
58 |                     if self.group_field:
59 |                         group = user_info.get(self.group_field, self.default_group)
60 |                 return group, user_info
61 |             except requests.exceptions.RequestException:
62 |                 return None
63 | 
64 |         if self.use_cache:
65 |             try:
66 |                 import cachetools  # pylint: disable=import-outside-toplevel
67 |             except ImportError as e:
68 |                 raise ImportError(
69 |                     "Missing optional dependency 'cachetools'. "
70 |                     "Using 'lm_proxy.api_key_check.CheckAPIKeyWithRequest' with 'use_cache = true' "
71 |                     "requires installing 'cachetools' package. "
72 |                     "\nPlease install it with following command: 'pip install cachetools'"
73 |                 ) from e
74 |             cache = cachetools.TTLCache(maxsize=self.cache_size, ttl=self.cache_ttl)
75 |             self._func = cachetools.cached(cache)(check_func)
76 |         else:
77 |             self._func = check_func
78 | 
79 |     def __call__(self, api_key: str) -> Optional[tuple[str, dict]]:
80 |         return self._func(api_key)
81 | 


--------------------------------------------------------------------------------
/lm_proxy/utils.py:
--------------------------------------------------------------------------------
  1 | """Common usage utility functions."""
  2 | import os
  3 | import json
  4 | import inspect
  5 | import logging
  6 | from typing import Any, Callable, Union
  7 | from datetime import datetime, date, time
  8 | 
  9 | from microcore.utils import resolve_callable
 10 | from starlette.requests import Request
 11 | 
 12 | 
 13 | def resolve_obj_path(obj, path: str, default=None):
 14 |     """
 15 |     Resolves dotted path supporting
 16 |     attributes, dict keys and list indices.
 17 |     """
 18 |     for part in path.split("."):
 19 |         try:
 20 |             if isinstance(obj, dict):
 21 |                 obj = obj[part]
 22 |             elif isinstance(obj, list):
 23 |                 part = int(part)  # Convert to int for list indexing
 24 |                 obj = obj[part]
 25 |             else:
 26 |                 obj = getattr(obj, part)
 27 |         except (AttributeError, KeyError, TypeError, ValueError, IndexError):
 28 |             return default
 29 |     return obj
 30 | 
 31 | 
 32 | def resolve_instance_or_callable(
 33 |     item: Union[str, Callable, dict, object],
 34 |     class_key: str = "class",
 35 |     debug_name: str = None,
 36 |     allow_types: list[type] = None,
 37 | ) -> Callable | object | None:
 38 |     """
 39 |     Resolves a class instance or callable from various configuration formats.
 40 |     """
 41 |     if item is None or item == "":
 42 |         return None
 43 |     if isinstance(item, dict):
 44 |         if class_key not in item:
 45 |             raise ValueError(
 46 |                 f"'{class_key}' key is missing in {debug_name or 'item'} config: {item}"
 47 |             )
 48 |         args = dict(item)
 49 |         class_name = args.pop(class_key)
 50 |         constructor = resolve_callable(class_name)
 51 |         return constructor(**args)
 52 |     if isinstance(item, str):
 53 |         fn = resolve_callable(item)
 54 |         return fn() if inspect.isclass(fn) else fn
 55 |     if callable(item):
 56 |         return item() if inspect.isclass(item) else item
 57 |     if allow_types and any(isinstance(item, t) for t in allow_types):
 58 |         return item
 59 |     raise ValueError(f"Invalid {debug_name or 'item'} config: {item}")
 60 | 
 61 | 
 62 | class CustomJsonEncoder(json.JSONEncoder):
 63 |     """
 64 |     Custom JSON encoder that handles datetime / date / time, pydantic models, etc.
 65 |     """
 66 |     def default(self, o):
 67 |         if isinstance(o, (datetime, date, time)):
 68 |             return o.isoformat()
 69 |         if hasattr(o, "model_dump"):
 70 |             return o.model_dump()
 71 |         if hasattr(o, "dict"):
 72 |             return o.dict()
 73 |         if hasattr(o, "__dict__"):
 74 |             return o.__dict__
 75 |         return super().default(o)
 76 | 
 77 | 
 78 | def get_client_ip(request: Request) -> str:
 79 |     """
 80 |     Extract the client's IP address from the request.
 81 |     """
 82 |     # Try different headers in order of preference
 83 |     if forwarded_for := request.headers.get("X-Forwarded-For"):
 84 |         return forwarded_for.split(",")[0].strip()
 85 |     if real_ip := request.headers.get("X-Real-IP"):
 86 |         return real_ip
 87 |     if forwarded := request.headers.get("Forwarded"):
 88 |         # Parse Forwarded header (RFC 7239)
 89 |         return forwarded.split("for=")[1].split(";")[0].strip()
 90 | 
 91 |     # Fallback to direct client
 92 |     return request.client.host if request.client else "unknown"
 93 | 
 94 | 
 95 | def replace_env_strings_recursive(data: Any) -> Any:
 96 |     """
 97 |     Recursively traverses dicts and lists, replacing all string values
 98 |     that start with 'env:' with the corresponding environment variable.
 99 |     For example, a string "env:VAR_NAME" will be replaced by the value of the
100 |     environment variable "VAR_NAME".
101 |     """
102 |     if isinstance(data, dict):
103 |         return {k: replace_env_strings_recursive(v) for k, v in data.items()}
104 |     if isinstance(data, list):
105 |         return [replace_env_strings_recursive(i) for i in data]
106 |     if isinstance(data, str) and data.startswith("env:"):
107 |         env_var_name = data[4:]
108 |         if env_var_name not in os.environ:
109 |             logging.warning("Environment variable '%s' not found", env_var_name)
110 |         return os.environ.get(env_var_name, "")
111 |     return data
112 | 


--------------------------------------------------------------------------------
/lm_proxy/bootstrap.py:
--------------------------------------------------------------------------------
  1 | """Initialization and bootstrapping."""
  2 | import sys
  3 | import logging
  4 | import inspect
  5 | from os import PathLike
  6 | from datetime import datetime
  7 | from typing import TYPE_CHECKING
  8 | 
  9 | import microcore as mc
 10 | from microcore import ui
 11 | from microcore.configuration import get_bool_from_env
 12 | from dotenv import load_dotenv
 13 | 
 14 | from .config import Config
 15 | from .utils import resolve_instance_or_callable
 16 | 
 17 | if TYPE_CHECKING:
 18 |     from .loggers import TLogger
 19 | 
 20 | 
 21 | def setup_logging(log_level: int = logging.INFO):
 22 |     """Setup logging format and level."""
 23 |     class CustomFormatter(logging.Formatter):
 24 |         """Custom log formatter with colouring."""
 25 |         def format(self, record):
 26 |             dt = datetime.fromtimestamp(record.created).strftime("%H:%M:%S")
 27 |             message, level_name = record.getMessage(), record.levelname
 28 |             if record.levelno == logging.WARNING:
 29 |                 message = mc.ui.yellow(message)
 30 |                 level_name = mc.ui.yellow(level_name)
 31 |             if record.levelno >= logging.ERROR:
 32 |                 message = mc.ui.red(message)
 33 |                 level_name = mc.ui.red(level_name)
 34 |             return f"{dt} {level_name}: {message}"
 35 | 
 36 |     handler = logging.StreamHandler()
 37 |     handler.setFormatter(CustomFormatter())
 38 |     logging.basicConfig(level=log_level, handlers=[handler])
 39 | 
 40 | 
 41 | class Env:
 42 |     """Runtime environment singleton."""
 43 |     config: Config
 44 |     connections: dict[str, mc.types.LLMAsyncFunctionType]
 45 |     debug: bool
 46 |     components: dict
 47 |     loggers: list["TLogger"]
 48 | 
 49 |     def _init_components(self):
 50 |         self.components = {}
 51 |         for name, component_data in self.config.components.items():
 52 |             self.components[name] = resolve_instance_or_callable(component_data)
 53 |             logging.info("Component initialized: '%s'.", name)
 54 | 
 55 |     @staticmethod
 56 |     def init(config: Config | str | PathLike, debug: bool = False):
 57 |         """Initializes the LM-Proxy runtime environment singleton."""
 58 |         env.debug = debug
 59 | 
 60 |         if not isinstance(config, Config):
 61 |             if isinstance(config, (str, PathLike)):
 62 |                 config = Config.load(config)
 63 |             else:
 64 |                 raise ValueError("config must be a path (str or PathLike) or Config instance")
 65 |         env.config = config
 66 | 
 67 |         env._init_components()
 68 | 
 69 |         env.loggers = [resolve_instance_or_callable(logger) for logger in env.config.loggers]
 70 | 
 71 |         # initialize connections
 72 |         env.connections = {}
 73 |         for conn_name, conn_config in env.config.connections.items():
 74 |             logging.info("Initializing '%s' LLM proxy connection...", conn_name)
 75 |             try:
 76 |                 if inspect.iscoroutinefunction(conn_config):
 77 |                     env.connections[conn_name] = conn_config
 78 |                 elif isinstance(conn_config, str):
 79 |                     env.connections[conn_name] = resolve_instance_or_callable(conn_config)
 80 |                 else:
 81 |                     mc.configure(
 82 |                         **conn_config, EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE
 83 |                     )
 84 |                     env.connections[conn_name] = mc.env().llm_async_function
 85 |             except mc.LLMConfigError as e:
 86 |                 raise ValueError(
 87 |                     f"Error in configuration for connection '{conn_name}': {e}"
 88 |                 ) from e
 89 | 
 90 |         logging.info("Done initializing %d connections.", len(env.connections))
 91 | 
 92 | 
 93 | env = Env()
 94 | 
 95 | 
 96 | def bootstrap(config: str | Config = "config.toml", env_file: str = ".env", debug=None):
 97 |     """Bootstraps the LM-Proxy environment."""
 98 |     def log_bootstrap():
 99 |         cfg_val = 'dynamic' if isinstance(config, Config) else ui.blue(config)
100 |         cfg_line = f"\n  - Config{ui.gray('......')}[ {cfg_val} ]"
101 |         env_line = f"\n  - Env. File{ui.gray('...')}[ {ui.blue(env_file)} ]" if env_file else ""
102 |         dbg_line = f"\n  - Debug{ui.gray('.......')}[ {ui.yellow('On')} ]" if debug else ""
103 |         message = f"Bootstrapping {ui.magenta('LM-Proxy')}...{cfg_line}{env_line}{dbg_line}"
104 |         logging.info(message)
105 | 
106 |     if env_file:
107 |         load_dotenv(env_file, override=True)
108 |     if debug is None:
109 |         debug = "--debug" in sys.argv or get_bool_from_env("LM_PROXY_DEBUG", False)
110 |     setup_logging(logging.DEBUG if debug else logging.INFO)
111 |     mc.logging.LoggingConfig.OUTPUT_METHOD = logging.info
112 |     log_bootstrap()
113 |     Env.init(config, debug=debug)
114 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | from types import SimpleNamespace
  4 | 
  5 | import pytest
  6 | from starlette.requests import Request
  7 | 
  8 | from lm_proxy.utils import (
  9 |     resolve_instance_or_callable,
 10 |     replace_env_strings_recursive,
 11 |     resolve_obj_path,
 12 |     get_client_ip,
 13 | )
 14 | 
 15 | 
 16 | def test_resolve_instance_or_callable():
 17 |     assert resolve_instance_or_callable(None) is None
 18 | 
 19 |     obj1, obj2 = object(), object()
 20 |     ins = resolve_instance_or_callable(obj1, allow_types=[object])
 21 |     assert ins is obj1 and ins is not obj2
 22 | 
 23 |     with pytest.raises(ValueError):
 24 |         resolve_instance_or_callable(123)
 25 | 
 26 |     with pytest.raises(ValueError):
 27 |         resolve_instance_or_callable([])
 28 | 
 29 |     with pytest.raises(ValueError):
 30 |         resolve_instance_or_callable({})
 31 | 
 32 |     assert resolve_instance_or_callable(lambda: 42)() == 42
 33 | 
 34 |     class MyClass:
 35 |         def __init__(self, value=0):
 36 |             self.value = value
 37 | 
 38 |     res = resolve_instance_or_callable(lambda: MyClass(10), allow_types=[MyClass])
 39 |     assert not isinstance(res, MyClass) and res().value == 10
 40 | 
 41 |     ins = resolve_instance_or_callable(MyClass(20), allow_types=[MyClass])
 42 |     assert isinstance(ins, MyClass) and ins.value == 20
 43 |     assert resolve_instance_or_callable(
 44 |         "lm_proxy.utils.resolve_instance_or_callable"
 45 |     ) is resolve_instance_or_callable
 46 | 
 47 |     ins = resolve_instance_or_callable({
 48 |         'class': 'lm_proxy.loggers.JsonLogWriter',
 49 |         'file_name': 'test.log'
 50 |     })
 51 |     assert ins.__class__.__name__ == 'JsonLogWriter' and ins.file_name == 'test.log'
 52 | 
 53 | 
 54 | def test_replace_env_strings_recursive(caplog):
 55 |     os.environ['TEST_VAR1'] = 'env_value1'
 56 |     os.environ['TEST_VAR2'] = 'env_value2'
 57 |     assert replace_env_strings_recursive("env:TEST_VAR1") == 'env_value1'
 58 | 
 59 |     caplog.set_level(logging.WARNING)
 60 |     assert replace_env_strings_recursive("env:NON_EXIST") == ''
 61 |     assert len(caplog.records) == 1
 62 | 
 63 |     assert replace_env_strings_recursive([["env:TEST_VAR1"]]) == [['env_value1']]
 64 |     assert replace_env_strings_recursive(
 65 |         {"data": {"field": "env:TEST_VAR1"}}
 66 |     ) == {"data": {"field": "env_value1"}}
 67 | 
 68 | 
 69 | def test_resolve_obj_path():
 70 |     o = SimpleNamespace(
 71 |         a=SimpleNamespace(
 72 |             b=dict(
 73 |                 c=[None, lambda x: x * 2]
 74 |             )
 75 |         )
 76 |     )
 77 |     assert resolve_obj_path(o, "a.b.c.1")(10) == 20
 78 |     assert resolve_obj_path(o, "a.b.cc.1", "no") == "no"
 79 | 
 80 | 
 81 | def test_get_client_ip():
 82 |     request = Request(scope={
 83 |         "type": "http",
 84 |         "headers": [],
 85 |     })
 86 |     assert get_client_ip(request) == "unknown"
 87 | 
 88 |     request = Request(scope={
 89 |         "type": "http",
 90 |         "headers": [(b"x-forwarded-for", b"192.168.1.1")],
 91 |     })
 92 |     assert get_client_ip(request) == "192.168.1.1"
 93 | 
 94 |     request = Request(scope={
 95 |         "type": "http",
 96 |         "headers": [(b"x-forwarded-for", b"192.168.1.1, 10.0.0.2")],
 97 |     })
 98 |     assert get_client_ip(request) == "192.168.1.1"  # should take the first IP
 99 | 
100 |     request = Request(scope={
101 |         "type": "http",
102 |         "headers": [(b"x-real-ip", b"203.0.113.5")],
103 |     })
104 |     assert get_client_ip(request) == "203.0.113.5"
105 | 
106 |     request = Request(scope={
107 |         "type": "http",
108 |         "headers": [],
109 |         "client": ("127.0.0.1", 12345),
110 |     })
111 |     assert get_client_ip(request) == "127.0.0.1"
112 | 
113 |     request = Request(scope={
114 |         "type": "http",
115 |         "headers": [
116 |             (b"x-real-ip", b"203.0.113.5"),
117 |             (b"x-forwarded-for", b"192.168.1.1, 10.0.0.2"),
118 |         ],
119 |     })
120 |     assert get_client_ip(request) == "192.168.1.1"  # x-forwarded-for has priority
121 | 
122 |     # RFC 7239 Forwarded header
123 |     result = get_client_ip(Request(scope={
124 |         "type": "http",
125 |         "headers": [(b"forwarded", b"for=192.0.2.60;proto=http;by=203.0.113.43")],
126 |     }))
127 |     assert result == "192.0.2.60"
128 | 
129 |     # IPv6 address
130 |     assert get_client_ip(Request(scope={
131 |         "type": "http",
132 |         "headers": [
133 |             (b"user-agent", b"Mozilla/5.0"),
134 |             (b"x-forwarded-for", b"2001:0db8:85a3:0000:0000:8a2e:0370:7334"),
135 |             (b"content-type", b"application/json"),
136 |         ],
137 |     })) == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
138 | 
139 |     """Test when client IP is in scope"""
140 |     assert get_client_ip(Request(scope={
141 |         "type": "http",
142 |         "headers": [],
143 |         "client": ("192.168.1.100", 8080),
144 |     })) == "192.168.1.100"
145 | 


--------------------------------------------------------------------------------
/tests/configs/test_disabled.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | from types import SimpleNamespace
  4 | 
  5 | import pytest
  6 | from starlette.requests import Request
  7 | 
  8 | from lm_proxy.utils import (
  9 |     resolve_instance_or_callable,
 10 |     replace_env_strings_recursive,
 11 |     resolve_obj_path,
 12 |     get_client_ip,
 13 | )
 14 | 
 15 | 
 16 | def test_resolve_instance_or_callable():
 17 |     assert resolve_instance_or_callable(None) is None
 18 | 
 19 |     obj1, obj2 = object(), object()
 20 |     ins = resolve_instance_or_callable(obj1, allow_types=[object])
 21 |     assert ins is obj1 and ins is not obj2
 22 | 
 23 |     with pytest.raises(ValueError):
 24 |         resolve_instance_or_callable(123)
 25 | 
 26 |     with pytest.raises(ValueError):
 27 |         resolve_instance_or_callable([])
 28 | 
 29 |     with pytest.raises(ValueError):
 30 |         resolve_instance_or_callable({})
 31 | 
 32 |     assert resolve_instance_or_callable(lambda: 42)() == 42
 33 | 
 34 |     class MyClass:
 35 |         def __init__(self, value=0):
 36 |             self.value = value
 37 | 
 38 |     res = resolve_instance_or_callable(lambda: MyClass(10), allow_types=[MyClass])
 39 |     assert not isinstance(res, MyClass) and res().value == 10
 40 | 
 41 |     ins = resolve_instance_or_callable(MyClass(20), allow_types=[MyClass])
 42 |     assert isinstance(ins, MyClass) and ins.value == 20
 43 |     assert resolve_instance_or_callable(
 44 |         "lm_proxy.utils.resolve_instance_or_callable"
 45 |     ) is resolve_instance_or_callable
 46 | 
 47 |     ins = resolve_instance_or_callable({
 48 |         'class': 'lm_proxy.loggers.JsonLogWriter',
 49 |         'file_name': 'test.log'
 50 |     })
 51 |     assert ins.__class__.__name__ == 'JsonLogWriter' and ins.file_name == 'test.log'
 52 | 
 53 | 
 54 | def test_replace_env_strings_recursive(caplog):
 55 |     os.environ['TEST_VAR1'] = 'env_value1'
 56 |     os.environ['TEST_VAR2'] = 'env_value2'
 57 |     assert replace_env_strings_recursive("env:TEST_VAR1") == 'env_value1'
 58 | 
 59 |     caplog.set_level(logging.WARNING)
 60 |     assert replace_env_strings_recursive("env:NON_EXIST") == ''
 61 |     assert len(caplog.records) == 1
 62 | 
 63 |     assert replace_env_strings_recursive([["env:TEST_VAR1"]]) == [['env_value1']]
 64 |     assert replace_env_strings_recursive(
 65 |         {"data": {"field": "env:TEST_VAR1"}}
 66 |     ) == {"data": {"field": "env_value1"}}
 67 | 
 68 | 
 69 | def test_resolve_obj_path():
 70 |     o = SimpleNamespace(
 71 |         a=SimpleNamespace(
 72 |             b=dict(
 73 |                 c=[None, lambda x: x * 2]
 74 |             )
 75 |         )
 76 |     )
 77 |     assert resolve_obj_path(o, "a.b.c.1")(10) == 20
 78 |     assert resolve_obj_path(o, "a.b.cc.1", "no") == "no"
 79 | 
 80 | 
 81 | def test_get_client_ip():
 82 |     request = Request(scope={
 83 |         "type": "http",
 84 |         "headers": [],
 85 |     })
 86 |     assert get_client_ip(request) == "unknown"
 87 | 
 88 |     request = Request(scope={
 89 |         "type": "http",
 90 |         "headers": [(b"x-forwarded-for", b"192.168.1.1")],
 91 |     })
 92 |     assert get_client_ip(request) == "192.168.1.1"
 93 | 
 94 |     request = Request(scope={
 95 |         "type": "http",
 96 |         "headers": [(b"x-forwarded-for", b"192.168.1.1, 10.0.0.2")],
 97 |     })
 98 |     assert get_client_ip(request) == "192.168.1.1"  # should take the first IP
 99 | 
100 |     request = Request(scope={
101 |         "type": "http",
102 |         "headers": [(b"x-real-ip", b"203.0.113.5")],
103 |     })
104 |     assert get_client_ip(request) == "203.0.113.5"
105 | 
106 |     request = Request(scope={
107 |         "type": "http",
108 |         "headers": [],
109 |         "client": ("127.0.0.1", 12345),
110 |     })
111 |     assert get_client_ip(request) == "127.0.0.1"
112 | 
113 |     request = Request(scope={
114 |         "type": "http",
115 |         "headers": [
116 |             (b"x-real-ip", b"203.0.113.5"),
117 |             (b"x-forwarded-for", b"192.168.1.1, 10.0.0.2"),
118 |         ],
119 |     })
120 |     assert get_client_ip(request) == "192.168.1.1"  # x-forwarded-for has priority
121 | 
122 |     # RFC 7239 Forwarded header
123 |     result = get_client_ip(Request(scope={
124 |         "type": "http",
125 |         "headers": [(b"forwarded", b"for=192.0.2.60;proto=http;by=203.0.113.43")],
126 |     }))
127 |     assert result == "192.0.2.60"
128 | 
129 |     # IPv6 address
130 |     assert get_client_ip(Request(scope={
131 |         "type": "http",
132 |         "headers": [
133 |             (b"user-agent", b"Mozilla/5.0"),
134 |             (b"x-forwarded-for", b"2001:0db8:85a3:0000:0000:8a2e:0370:7334"),
135 |             (b"content-type", b"application/json"),
136 |         ],
137 |     })) == "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
138 | 
139 |     """Test when client IP is in scope"""
140 |     assert get_client_ip(Request(scope={
141 |         "type": "http",
142 |         "headers": [],
143 |         "client": ("192.168.1.100", 8080),
144 |     })) == "192.168.1.100"
145 | 


--------------------------------------------------------------------------------
/lm_proxy/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Configuration models for LM-Proxy settings.
  3 | This module defines Pydantic models that match the structure of config.toml.
  4 | """
  5 | 
  6 | import os
  7 | from enum import StrEnum
  8 | from typing import Union, Callable, Dict, Optional
  9 | from importlib.metadata import entry_points
 10 | 
 11 | from pydantic import BaseModel, Field, ConfigDict
 12 | 
 13 | from .utils import resolve_instance_or_callable, replace_env_strings_recursive
 14 | from .loggers import TLogger
 15 | 
 16 | 
 17 | class ModelListingMode(StrEnum):
 18 |     """
 19 |     Enum for model listing modes in the /models endpoint.
 20 |     """
 21 | 
 22 |     # Show all models from API provider matching the patterns (not implemented yet)
 23 |     EXPAND_WILDCARDS = "expand_wildcards"
 24 |     # Ignore wildcard models, show only exact model names
 25 |     # (keys of the config.routing dict not containing * or ?)
 26 |     IGNORE_WILDCARDS = "ignore_wildcards"
 27 |     # Show everything as is, including wildcard patterns
 28 |     AS_IS = "as_is"
 29 | 
 30 | 
 31 | class Group(BaseModel):
 32 |     """User group configuration."""
 33 |     api_keys: list[str] = Field(default_factory=list)
 34 |     allowed_connections: str = Field(default="*")  # Comma-separated list or "*"
 35 | 
 36 |     def allows_connecting_to(self, connection_name: str) -> bool:
 37 |         """Check if the group allows access to the specified connection."""
 38 |         if self.allowed_connections == "*":
 39 |             return True
 40 |         allowed = [c.strip() for c in self.allowed_connections.split(",") if c.strip()]
 41 |         return connection_name in allowed
 42 | 
 43 | 
 44 | TApiKeyCheckResult = Optional[Union[str, tuple[str, dict]]]
 45 | TApiKeyCheckFunc = Callable[[str | None], TApiKeyCheckResult]
 46 | 
 47 | 
 48 | class Config(BaseModel):
 49 |     """Main configuration model matching config.toml structure."""
 50 | 
 51 |     model_config = ConfigDict(
 52 |         extra="forbid",
 53 |         arbitrary_types_allowed=True,
 54 |     )
 55 |     enabled: bool = True
 56 |     host: str = "0.0.0.0"
 57 |     port: int = 8000
 58 |     ssl_keyfile: str | None = None
 59 |     """ Path to SSL key file for HTTPS support, if None, HTTP is used. """
 60 |     ssl_certfile: str | None = None
 61 |     """ Path to SSL certificate file for HTTPS support, if None, HTTP is used. """
 62 |     api_prefix: str = "/v1"
 63 |     """ Prefix for API endpoints, default is /v1 """
 64 |     dev_autoreload: bool = False
 65 |     connections: dict[str, Union[dict, Callable, str]] = Field(
 66 |         ...,  # Required field (no default)
 67 |         description="Dictionary of connection configurations",
 68 |         examples=[{"openai": {"api_key": "sk-..."}}],
 69 |     )
 70 |     routing: dict[str, str] = Field(default_factory=dict)
 71 |     """ model_name_pattern* => connection_name.< model | * >, example: {"gpt-*": "oai.*"} """
 72 |     groups: dict[str, Group] = Field(default_factory=lambda: {"default": Group()})
 73 |     api_key_check: Union[str, TApiKeyCheckFunc, dict] = Field(
 74 |         default="lm_proxy.api_key_check.check_api_key_in_config",
 75 |         description="Function to check Virtual API keys",
 76 |     )
 77 |     loggers: list[Union[str, dict, TLogger]] = Field(default_factory=list)
 78 |     encryption_key: str = Field(
 79 |         default="Eclipse",
 80 |         description="Key for encrypting sensitive data (must be explicitly set)",
 81 |     )
 82 |     model_listing_mode: ModelListingMode = Field(
 83 |         default=ModelListingMode.AS_IS,
 84 |         description="How to handle wildcard models in /models endpoint",
 85 |     )
 86 |     model_info: dict[str, dict] = Field(
 87 |         default_factory=dict,
 88 |         description="Additional metadata for /models endpoint",
 89 |     )
 90 |     components: dict[str, Union[str, Callable, dict]] = Field(default_factory=dict)
 91 | 
 92 |     def __init__(self, **data):
 93 |         super().__init__(**data)
 94 |         self.api_key_check = resolve_instance_or_callable(
 95 |             self.api_key_check,
 96 |             debug_name="check_api_key",
 97 |         )
 98 | 
 99 |     @staticmethod
100 |     def _load_raw(config_path: str | os.PathLike = "config.toml") -> Union["Config", Dict]:
101 |         config_ext = os.path.splitext(config_path)[1].lower().lstrip(".")
102 |         for entry_point in entry_points(group="config.loaders"):
103 |             if config_ext == entry_point.name:
104 |                 loader = entry_point.load()
105 |                 config_data = loader(config_path)
106 |                 return config_data
107 | 
108 |         raise ValueError(f"No loader found for configuration file extension: {config_ext}")
109 | 
110 |     @staticmethod
111 |     def load(config_path: str | os.PathLike = "config.toml") -> "Config":
112 |         """
113 |         Load configuration from a TOML or Python file.
114 | 
115 |         Args:
116 |             config_path: Path to the config.toml file
117 | 
118 |         Returns:
119 |             Config object with parsed configuration
120 |         """
121 |         config = Config._load_raw(config_path)
122 |         if isinstance(config, dict):
123 |             config = replace_env_strings_recursive(config)
124 |             config = Config(**config)
125 |         elif not isinstance(config, Config):
126 |             raise TypeError("Loaded configuration must be a dict or Config instance")
127 |         return config
128 | 


--------------------------------------------------------------------------------
/lm_proxy/core.py:
--------------------------------------------------------------------------------
  1 | """Core LM-Proxy logic"""
  2 | import asyncio
  3 | import fnmatch
  4 | import json
  5 | import logging
  6 | import secrets
  7 | import time
  8 | import hashlib
  9 | from datetime import datetime
 10 | from typing import Optional
 11 | 
 12 | from fastapi import HTTPException
 13 | from starlette.requests import Request
 14 | from starlette.responses import JSONResponse, Response, StreamingResponse
 15 | 
 16 | from .base_types import ChatCompletionRequest, RequestContext
 17 | from .bootstrap import env
 18 | from .config import Config
 19 | from .utils import get_client_ip
 20 | 
 21 | 
 22 | def parse_routing_rule(rule: str, config: Config) -> tuple[str, str]:
 23 |     """
 24 |     Parses a routing rule in the format 'connection.model' or 'connection.*'.
 25 |     Returns a tuple of (connection_name, model_part).
 26 |     Args:
 27 |         rule (str): The routing rule string.
 28 |         config (Config): The configuration object containing defined connections.
 29 |     Raises:
 30 |         ValueError: If the rule format is invalid or the connection is unknown.
 31 |     """
 32 |     if "." not in rule:
 33 |         raise ValueError(
 34 |             f"Invalid routing rule '{rule}'. Expected format: 'connection.model' or 'connection.*'"
 35 |         )
 36 |     connection_name, model_part = rule.split(".", 1)
 37 |     if connection_name not in config.connections:
 38 |         raise ValueError(
 39 |             f"Routing selected unknown connection '{connection_name}'. "
 40 |             f"Defined connections: {', '.join(config.connections.keys()) or '(none)'}"
 41 |         )
 42 |     return connection_name, model_part
 43 | 
 44 | 
 45 | def resolve_connection_and_model(
 46 |     config: Config, external_model: str
 47 | ) -> tuple[str, str]:
 48 |     """
 49 |     Resolves the connection name and model name based on routing rules.
 50 |     Args:
 51 |         config (Config): The configuration object containing routing rules.
 52 |         external_model (str): The external model name from the request.
 53 |     """
 54 |     for model_match, rule in config.routing.items():
 55 |         if fnmatch.fnmatchcase(external_model, model_match):
 56 |             connection_name, model_part = parse_routing_rule(rule, config)
 57 |             resolved_model = external_model if model_part == "*" else model_part
 58 |             return connection_name, resolved_model
 59 | 
 60 |     raise ValueError(
 61 |         f"No routing rule matched model '{external_model}'. "
 62 |         'Add a catch-all rule like "*" = "openai.gpt-3.5-turbo" if desired.'
 63 |     )
 64 | 
 65 | 
 66 | async def process_stream(
 67 |     async_llm_func, request: ChatCompletionRequest, llm_params, log_entry: RequestContext
 68 | ):
 69 |     """
 70 |     Streams the response from the LLM function.
 71 |     """
 72 |     prompt = request.messages
 73 |     queue = asyncio.Queue()
 74 |     stream_id = f"chatcmpl-{secrets.token_hex(12)}"
 75 |     created = int(time.time())
 76 | 
 77 |     async def callback(chunk):
 78 |         await queue.put(chunk)
 79 | 
 80 |     def make_chunk(delta=None, content=None, finish_reason=None, error=None) -> str:
 81 |         if delta is None:
 82 |             delta = {"content": str(content)} if content is not None else {}
 83 |         obj = {
 84 |             "id": stream_id,
 85 |             "object": "chat.completion.chunk",
 86 |             "created": created,
 87 |             "choices": [{"index": 0, "delta": delta}],
 88 |         }
 89 |         if error is not None:
 90 |             obj["error"] = {"message": str(error), "type": type(error).__name__}
 91 |             if finish_reason is None:
 92 |                 finish_reason = "error"
 93 |         if finish_reason is not None:
 94 |             obj["choices"][0]["finish_reason"] = finish_reason
 95 |         return "data: " + json.dumps(obj) + "\n\n"
 96 | 
 97 |     task = asyncio.create_task(async_llm_func(prompt, **llm_params, callback=callback))
 98 | 
 99 |     try:
100 |         # Initial chunk: role
101 |         yield make_chunk(delta={"role": "assistant"})
102 | 
103 |         while not task.done():
104 |             try:
105 |                 block = await asyncio.wait_for(queue.get(), timeout=0.1)
106 |                 yield make_chunk(content=block)
107 |             except asyncio.TimeoutError:
108 |                 continue
109 | 
110 |         # Drain any remaining
111 |         while not queue.empty():
112 |             block = await queue.get()
113 |             yield make_chunk(content=block)
114 | 
115 |     finally:
116 |         try:
117 |             result = await task
118 |             log_entry.response = result
119 |         except Exception as e:
120 |             log_entry.error = e
121 |             yield make_chunk(error={"message": str(e), "type": type(e).__name__})
122 | 
123 |     if log_entry.error:
124 |         yield make_chunk(finish_reason="error")
125 |     else:
126 |         yield make_chunk(finish_reason="stop")
127 |     yield "data: [DONE]\n\n"
128 |     await log_non_blocking(log_entry)
129 |     if log_entry.error:
130 |         if env.debug:
131 |             raise log_entry.error
132 |         logging.error(log_entry.error)
133 | 
134 | 
135 | def read_api_key(request: Request) -> str:
136 |     """
137 |     Extracts the Bearer token from the Authorization header.
138 |     returns '' if not present.
139 |     """
140 |     auth = request.headers.get("authorization")
141 |     if auth and auth.lower().startswith("bearer "):
142 |         return auth[7:].strip()
143 |     return ""
144 | 
145 | 
146 | def api_key_id(api_key: Optional[str]) -> str | None:
147 |     """
148 |     Generates a consistent hashed identifier for the given API key.
149 |     """
150 |     if not api_key:
151 |         return None
152 |     return hashlib.md5(
153 |         (api_key + env.config.encryption_key).encode("utf-8")
154 |     ).hexdigest()
155 | 
156 | 
157 | async def check(request: Request) -> tuple[str, str, dict]:
158 |     """
159 |     API key and service availability check for endpoints.
160 |     Args:
161 |         request (Request): The incoming HTTP request object.
162 |     Returns:
163 |         tuple[str, str, dict]: A tuple containing the group name, the API key and user_info object.
164 |     Raises:
165 |         HTTPException: If the service is disabled or the API key is invalid.
166 |     """
167 |     if not env.config.enabled:
168 |         raise HTTPException(
169 |             status_code=503,
170 |             detail={
171 |                 "error": {
172 |                     "message": "The service is disabled.",
173 |                     "type": "service_unavailable",
174 |                     "param": None,
175 |                     "code": "service_disabled",
176 |                 }
177 |             },
178 |         )
179 |     api_key = read_api_key(request)
180 |     result = (env.config.api_key_check)(api_key)
181 |     if isinstance(result, tuple):
182 |         group, user_info = result
183 |     else:
184 |         group: str | bool | None = result
185 |         user_info = {}
186 | 
187 |     if not group:
188 |         raise HTTPException(
189 |             status_code=403,
190 |             detail={
191 |                 "error": {
192 |                     "message": "Incorrect API key provided: "
193 |                     "your API key is invalid, expired, or revoked.",
194 |                     "type": "invalid_request_error",
195 |                     "param": None,
196 |                     "code": "invalid_api_key",
197 |                 }
198 |             },
199 |         )
200 |     return group, api_key, user_info
201 | 
202 | 
203 | async def chat_completions(
204 |     request: ChatCompletionRequest, raw_request: Request
205 | ) -> Response:
206 |     """
207 |     Endpoint for chat completions that mimics OpenAI's API structure.
208 |     Streams the response from the LLM using microcore.
209 |     """
210 |     group, api_key, user_info = await check(raw_request)
211 |     llm_params = request.model_dump(exclude={"messages"}, exclude_none=True)
212 |     connection, llm_params["model"] = resolve_connection_and_model(
213 |         env.config, llm_params.get("model", "default_model")
214 |     )
215 |     log_entry = RequestContext(
216 |         request=request,
217 |         api_key_id=api_key_id(api_key),
218 |         group=group if isinstance(group, str) else None,
219 |         remote_addr=get_client_ip(raw_request),
220 |         connection=connection,
221 |         model=llm_params["model"],
222 |         user_info=user_info,
223 |     )
224 |     logging.debug(
225 |         "Resolved routing for [%s] --> connection: %s, model: %s",
226 |         request.model,
227 |         connection,
228 |         llm_params["model"],
229 |     )
230 | 
231 |     if not env.config.groups[group].allows_connecting_to(connection):
232 |         raise HTTPException(
233 |             status_code=403,
234 |             detail={
235 |                 "error": {
236 |                     "message": f"Your API key does not allow using the '{connection}' connection.",
237 |                     "type": "invalid_request_error",
238 |                     "param": None,
239 |                     "code": "connection_not_allowed",
240 |                 }
241 |             },
242 |         )
243 | 
244 |     async_llm_func = env.connections[connection]
245 | 
246 |     logging.info("Querying LLM... params: %s", llm_params)
247 |     if request.stream:
248 |         return StreamingResponse(
249 |             process_stream(async_llm_func, request, llm_params, log_entry),
250 |             media_type="text/event-stream",
251 |         )
252 | 
253 |     try:
254 |         out = await async_llm_func(request.messages, **llm_params)
255 |         log_entry.response = out
256 |         logging.info("LLM response: %s", out)
257 |     except Exception as e:
258 |         log_entry.error = e
259 |         await log_non_blocking(log_entry)
260 |         raise
261 |     await log_non_blocking(log_entry)
262 | 
263 |     return JSONResponse(
264 |         {
265 |             "choices": [
266 |                 {
267 |                     "index": 0,
268 |                     "message": {"role": "assistant", "content": str(out)},
269 |                     "finish_reason": "stop",
270 |                 }
271 |             ]
272 |         }
273 |     )
274 | 
275 | 
276 | async def log(request_ctx: RequestContext):
277 |     """
278 |     Creates log records for current request using all configured log handlers.
279 |     """
280 |     if request_ctx.duration is None and request_ctx.created_at:
281 |         request_ctx.duration = (datetime.now() - request_ctx.created_at).total_seconds()
282 |     for handler in env.loggers:
283 |         # check if it is async, then run both sync and async loggers in non-blocking way (sync too)
284 |         if asyncio.iscoroutinefunction(handler):
285 |             asyncio.create_task(handler(request_ctx))
286 |         else:
287 |             try:
288 |                 handler(request_ctx)
289 |             except Exception as e:
290 |                 logging.error("Error in logger handler: %s", e)
291 |                 raise e
292 | 
293 | 
294 | async def log_non_blocking(
295 |     request_ctx: RequestContext,
296 | ) -> Optional[asyncio.Task]:
297 |     """
298 |     Non-blocking log function that schedules logging as an asynchronous task.
299 |     """
300 |     if env.loggers:
301 |         task = asyncio.create_task(log(request_ctx))
302 |         return task
303 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center"><a href="#">LM-Proxy</a></h1>
  2 | <p align="center">
  3 |   <b>Lightweight, OpenAI-compatible HTTP proxy server / gateway</b><br>unifying access to multiple <b>Large Language Model providers</b> and local inference <br>through a single, standardized API endpoint.  
  4 | </p>
  5 | <p align="center">
  6 |   <a href="https://pypi.org/project/lm-proxy/"><img src="https://img.shields.io/pypi/v/lm-proxy?color=blue" alt="PyPI"></a>
  7 |   <a href="https://github.com/Nayjest/lm-proxy/actions/workflows/tests.yml"><img src="https://github.com/Nayjest/lm-proxy/actions/workflows/tests.yml/badge.svg" alt="Tests"></a>
  8 |   <a href="https://github.com/Nayjest/lm-proxy/actions/workflows/code-style.yml"><img src="https://github.com/Nayjest/lm-proxy/actions/workflows/code-style.yml/badge.svg" alt="Code Style"></a>
  9 |   <img src="https://raw.githubusercontent.com/Nayjest/lm-proxy/main/coverage.svg" alt="Code Coverage">
 10 |   <a href="https://www.bestpractices.dev/projects/11364"><img src="https://www.bestpractices.dev/projects/11364/badge"></a>
 11 |   <a href="https://github.com/Nayjest/lm-proxy/blob/main/LICENSE"><img src="https://img.shields.io/github/license/Nayjest/lm-proxy?color=d08aff" alt="License"></a>
 12 | </p>
 13 | 
 14 | Built with Python, FastAPI and [MicroCore](https://github.com/Nayjest/ai-microcore), **LM-Proxy** seamlessly integrates cloud providers like Google, Anthropic, and OpenAI, as well as local PyTorch-based inference, while maintaining full compatibility with OpenAI's API format.  
 15 | 
 16 | It works as a drop-in replacement for OpenAI's API, allowing you to switch between cloud providers and local models without modifying your existing client code.  
 17 | 
 18 | **LM-Proxy** supports **real-time token streaming**, **secure Virtual API key management**, and can be used both as an importable Python library and as a standalone HTTP service. Whether you're building production applications or experimenting with different models, LM-Proxy eliminates integration complexity and keeps your codebase **provider-agnostic**.
 19 | 
 20 | 
 21 | ## Table of Contents
 22 | - [Overview](#lm-proxy)
 23 | - [Features](#-features)
 24 | - [Getting Started](#-getting-started)
 25 |   - [Installation](#installation)
 26 |   - [Quick Start](#quick-start)
 27 | - [Configuration](#-configuration)
 28 |   - [Basic Structure](#basic-structure)
 29 |   - [Environment Variables](#environment-variables)
 30 | - [Proxy API Keys vs. Provider API Keys](#-proxy-api-keys-vs-provider-api-keys)
 31 | - [API Usage](#-api-usage)
 32 |   - [Chat Completions Endpoint](#chat-completions-endpoint)
 33 |   - [Models List Endpoint](#models-list-endpoint)
 34 | - [User Groups Configuration](#-user-groups-configuration)
 35 |   - [Basic Group Definition](#basic-group-definition)
 36 |   - [Group-based Access Control](#group-based-access-control)
 37 |   - [Connection Restrictions](#connection-restrictions)
 38 |   - [Virtual API Key Validation](#virtual-api-key-validation)
 39 | - [Advanced Usage](#%EF%B8%8F-advanced-usage)
 40 |   - [Dynamic Model Routing](#dynamic-model-routing)
 41 |   - [Load Balancing Example](#load-balancing-example)
 42 |   - [Google Vertex AI Example](#google-vertex-ai-configuration-example)
 43 |   - [Using Tokens from OIDC Provider as Virtual/Client API Keys](#using-tokens-from-oidc-provider-as-virtualclient-api-keys)
 44 | - [Add-on Components](#add-on-components)
 45 |   - [Database Connector](#database-connector) 
 46 | - [Debugging](#-debugging)
 47 | - [Contributing](#-contributing)
 48 | - [License](#-license)
 49 | 
 50 | ## ✨ Features
 51 | 
 52 | - **Provider Agnostic**: Connect to OpenAI, Anthropic, Google AI, local models, and more using a single API
 53 | - **Unified Interface**: Access all models through the standard OpenAI API format
 54 | - **Dynamic Routing**: Route requests to different LLM providers based on model name patterns
 55 | - **Stream Support**: Full streaming support for real-time responses
 56 | - **API Key Management**: Configurable API key validation and access control
 57 | - **Easy Configuration**: Simple TOML/YAML/JSON/Python configuration files for setup
 58 | - **Extensible by Design**: Minimal core with clearly defined extension points, enabling seamless customization and expansion without modifying the core system.
 59 | 
 60 | ## 🚀 Getting Started
 61 | 
 62 | ### Requirements
 63 | Python 3.11 | 3.12 | 3.13
 64 | 
 65 | ### Installation
 66 | 
 67 | ```bash
 68 | pip install lm-proxy
 69 | ```
 70 | 
 71 | ### Quick Start
 72 | 
 73 | #### 1. Create a `config.toml` file:
 74 | 
 75 | ```toml
 76 | host = "0.0.0.0"
 77 | port = 8000
 78 | 
 79 | [connections]
 80 | [connections.openai]
 81 | api_type = "open_ai"
 82 | api_base = "https://api.openai.com/v1/"
 83 | api_key = "env:OPENAI_API_KEY"
 84 | 
 85 | [connections.anthropic]
 86 | api_type = "anthropic"
 87 | api_key = "env:ANTHROPIC_API_KEY"
 88 | 
 89 | [routing]
 90 | "gpt*" = "openai.*"
 91 | "claude*" = "anthropic.*"
 92 | "*" = "openai.gpt-3.5-turbo"
 93 | 
 94 | [groups.default]
 95 | api_keys = ["YOUR_API_KEY_HERE"]
 96 | ```
 97 | > **Note** ℹ️
 98 | > To enhance security, consider storing upstream API keys in operating system environment variables rather than embedding them directly in the configuration file. You can reference these variables in the configuration using the env:<VAR_NAME> syntax.
 99 | 
100 | #### 2. Start the server:
101 | 
102 | ```bash
103 | lm-proxy
104 | ```
105 | Alternatively, run it as a Python module:
106 | ```bash
107 | python -m lm_proxy
108 | ```
109 | 
110 | #### 3. Use it with any OpenAI-compatible client:
111 | 
112 | ```python
113 | from openai import OpenAI
114 | 
115 | client = OpenAI(
116 |     api_key="YOUR_API_KEY_HERE",
117 |     base_url="http://localhost:8000/v1"
118 | )
119 | 
120 | completion = client.chat.completions.create(
121 |     model="gpt-5",  # This will be routed to OpenAI based on config
122 |     messages=[{"role": "user", "content": "Hello, world!"}]
123 | )
124 | print(completion.choices[0].message.content)
125 | ```
126 | 
127 | Or use the same endpoint with Claude models:
128 | 
129 | ```python
130 | completion = client.chat.completions.create(
131 |     model="claude-opus-4-1-20250805",  # This will be routed to Anthropic based on config
132 |     messages=[{"role": "user", "content": "Hello, world!"}]
133 | )
134 | ```
135 | 
136 | ## 📝 Configuration
137 | 
138 | LM-Proxy is configured through a TOML/YAML/JSON/Python file that specifies connections, routing rules, and access control.
139 | 
140 | ### Basic Structure
141 | 
142 | ```toml
143 | host = "0.0.0.0"  # Interface to bind to
144 | port = 8000       # Port to listen on
145 | dev_autoreload = false  # Enable for development
146 | 
147 | # API key validation function (optional)
148 | api_key_check = "lm_proxy.api_key_check.check_api_key_in_config"
149 | 
150 | # LLM Provider Connections
151 | [connections]
152 | 
153 | [connections.openai]
154 | api_type = "open_ai"
155 | api_base = "https://api.openai.com/v1/"
156 | api_key = "env:OPENAI_API_KEY"
157 | 
158 | [connections.google]
159 | api_type = "google_ai_studio"
160 | api_key = "env:GOOGLE_API_KEY"
161 | 
162 | [connections.anthropic]
163 | api_type = "anthropic"
164 | api_key  = "env:ANTHROPIC_API_KEY"
165 | 
166 | # Routing rules (model_pattern = "connection.model")
167 | [routing]
168 | "gpt*" = "openai.*"     # Route all GPT models to OpenAI
169 | "claude*" = "anthropic.*"  # Route all Claude models to Anthropic
170 | "gemini*" = "google.*"  # Route all Gemini models to Google
171 | "*" = "openai.gpt-3.5-turbo"  # Default fallback
172 | 
173 | # Access control groups
174 | [groups.default]
175 | api_keys = [
176 |     "KEY1",
177 |     "KEY2"
178 | ]
179 | 
180 | # optional
181 | [[loggers]]
182 | class = 'lm_proxy.loggers.BaseLogger'
183 | [loggers.log_writer]
184 | class = 'lm_proxy.loggers.log_writers.JsonLogWriter'
185 | file_name = 'storage/json.log'
186 | [loggers.entry_transformer]
187 | class = 'lm_proxy.loggers.LogEntryTransformer'
188 | completion_tokens = "response.usage.completion_tokens"
189 | prompt_tokens = "response.usage.prompt_tokens"
190 | prompt = "request.messages"
191 | response = "response"
192 | group = "group"
193 | connection = "connection"
194 | api_key_id = "api_key_id"
195 | remote_addr = "remote_addr"
196 | created_at = "created_at"
197 | duration = "duration"
198 | ```
199 | 
200 | ### Environment Variables
201 | 
202 | You can reference environment variables in your configuration file by prefixing values with `env:`.  
203 | 
204 | For example:
205 | 
206 | ```toml
207 | [connections.openai]
208 | api_key = "env:OPENAI_API_KEY"
209 | ```
210 | 
211 | At runtime, LM-Proxy automatically retrieves the value of the target variable
212 | (OPENAI_API_KEY) from your operating system’s environment or from a .env file, if present.
213 | 
214 | ### .env Files
215 | 
216 | By default, LM-Proxy looks for a `.env` file in the current working directory
217 | and loads environment variables from it.
218 | 
219 | You can refer to the [.env.template](https://github.com/Nayjest/lm-proxy/blob/main/.env.template)
220 |  file for an example:
221 | ```dotenv
222 | OPENAI_API_KEY=sk-u........
223 | GOOGLE_API_KEY=AI........
224 | ANTHROPIC_API_KEY=sk-ant-api03--vE........
225 | 
226 | # "1", "TRUE", "YES", "ON", "ENABLED", "Y", "+" are true, case-insensitive.
227 | # See https://github.com/Nayjest/ai-microcore/blob/v4.4.3/microcore/configuration.py#L36
228 | LM_PROXY_DEBUG=no
229 | ```
230 | 
231 | You can also control `.env` file usage with the `--env` command-line option:
232 | 
233 | ```bash
234 | # Use a custom .env file path
235 | lm-proxy --env="path/to/your/.env"
236 | # Disable .env loading
237 | lm-proxy --env=""
238 | ```
239 | 
240 | ## 🔑 Proxy API Keys vs. Provider API Keys
241 | 
242 | LM-Proxy utilizes two distinct types of API keys to facilitate secure and efficient request handling.
243 | 
244 | - **Proxy API Key (Virtual API Key, Client API Key):**  
245 | A unique key generated and managed within the LM-Proxy.  
246 | Clients use these keys to authenticate their requests to the proxy's API endpoints.  
247 | Each Client API Key is associated with a specific group, which defines the scope of access and permissions for the client's requests.  
248 | These keys allow users to securely interact with the proxy without direct access to external service credentials.
249 | 
250 | 
251 | 
252 | - **Provider API Key (Upstream API Key):**
253 | A key provided by external LLM inference providers (e.g., OpenAI, Anthropic, Mistral, etc.) and configured within the LM-Proxy.  
254 | The proxy uses these keys to authenticate and forward validated client requests to the respective external services.  
255 | Provider API Keys remain hidden from end users, ensuring secure and transparent communication with provider APIs.
256 | 
257 | This distinction ensures a clear separation of concerns: 
258 | Virtual API Keys manage user authentication and access within the proxy, 
259 | while Upstream API Keys handle secure communication with external providers.
260 | 
261 | ## 🔌 API Usage
262 | 
263 | LM-Proxy implements the OpenAI chat completions API endpoint. You can use any OpenAI-compatible client to interact with it.
264 | 
265 | ### Chat Completions Endpoint
266 | 
267 | ```http
268 | POST /v1/chat/completions
269 | ```
270 | 
271 | #### Request Format
272 | 
273 | ```json
274 | {
275 |   "model": "gpt-3.5-turbo",
276 |   "messages": [
277 |     {"role": "system", "content": "You are a helpful assistant."},
278 |     {"role": "user", "content": "What is the capital of France?"}
279 |   ],
280 |   "temperature": 0.7,
281 |   "stream": false
282 | }
283 | ```
284 | 
285 | #### Response Format
286 | 
287 | ```json
288 | {
289 |   "choices": [
290 |     {
291 |       "index": 0,
292 |       "message": {
293 |         "role": "assistant",
294 |         "content": "The capital of France is Paris."
295 |       },
296 |       "finish_reason": "stop"
297 |     }
298 |   ]
299 | }
300 | ```
301 | 
302 | 
303 | ### Models List Endpoint
304 | 
305 | 
306 | List and describe all models available through the API.
307 | 
308 | 
309 | ```http
310 | GET /v1/models
311 | ```
312 | 
313 | The **LM-Proxy** dynamically builds the models list based on routing rules defined in `config.routing`.  
314 | Routing keys can reference both **exact model names** and **model name patterns** (e.g., `"gpt*"`, `"claude*"`, etc.).
315 | 
316 | By default, wildcard patterns are displayed as-is in the models list (e.g., `"gpt*"`, `"claude*"`).  
317 | This behavior can be customized via the `model_listing_mode` configuration option:
318 | 
319 | ```
320 | model_listing_mode = "as_is" | "ignore_wildcards" | "expand_wildcards"
321 | ```
322 | 
323 | Available modes:
324 | 
325 | - **`as_is`** *(default)* — Lists all entries exactly as defined in the routing configuration, including wildcard patterns.  
326 | - **`ignore_wildcards`** — Excludes wildcard patterns, showing only explicitly defined model names.  
327 | - **`expand_wildcards`** — Expands wildcard patterns by querying each connected backend for available models *(feature not yet implemented)*.
328 | 
329 | To obtain a complete and accurate model list in the current implementation,
330 | all supported models must be explicitly defined in the routing configuration, for example:
331 | ```toml
332 | [routing]
333 | "gpt-4" = "my_openai_connection.*"
334 | "gpt-5" = "my_openai_connection.*"
335 | "gpt-8"= "my_openai_connection.gpt-3.5-turbo"
336 | "claude-4.5-sonnet" = "my_anthropic_connection.claude-sonnet-4-5-20250929"
337 | "claude-4.1-opus" = "my_anthropic_connection.claude-opus-4-1-20250805"
338 | [connections]
339 | [connections.my_openai_connection]
340 | api_type = "open_ai"
341 | api_base = "https://api.openai.com/v1/"
342 | api_key  = "env:OPENAI_API_KEY"
343 | [connections.my_anthropic_connection]
344 | api_type = "anthropic"
345 | api_key  = "env:ANTHROPIC_API_KEY"
346 | ```
347 | 
348 | 
349 | 
350 | #### Response Format
351 | 
352 | ```json
353 | {
354 |   "object": "list",
355 |   "data": [
356 |     {
357 |       "id": "gpt-6",
358 |       "object": "model",
359 |       "created": 1686935002,
360 |       "owned_by": "organization-owner"
361 |     },
362 |     {
363 |       "id": "claude-5-sonnet",
364 |       "object": "model",
365 |       "created": 1686935002,
366 |       "owned_by": "organization-owner"
367 |     }
368 |   ]
369 | }
370 | ```
371 | 
372 | ## 🔒 User Groups Configuration
373 | 
374 | The `[groups]` section in the configuration defines access control rules for different user groups.  
375 | Each group can have its own set of virtual API keys and permitted connections.
376 | 
377 | ### Basic Group Definition
378 | 
379 | ```toml
380 | [groups.default]
381 | api_keys = ["KEY1", "KEY2"]
382 | allowed_connections = "*"  # Allow access to all connections
383 | ```
384 | 
385 | ### Group-based Access Control
386 | 
387 | You can create multiple groups to segment your users and control their access:
388 | 
389 | ```toml
390 | # Admin group with full access
391 | [groups.admin]
392 | api_keys = ["ADMIN_KEY_1", "ADMIN_KEY_2"]
393 | allowed_connections = "*"  # Access to all connections
394 | 
395 | # Regular users with limited access
396 | [groups.users]
397 | api_keys = ["USER_KEY_1", "USER_KEY_2"]
398 | allowed_connections = "openai,anthropic"  # Only allowed to use specific connections
399 | 
400 | # Free tier with minimal access
401 | [groups.free]
402 | api_keys = ["FREE_KEY_1", "FREE_KEY_2"]
403 | allowed_connections = "openai"  # Only allowed to use OpenAI connection
404 | ```
405 | 
406 | ### Connection Restrictions
407 | 
408 | The `allowed_connections` parameter controls which upstream providers a group can access:
409 | 
410 | - `"*"` - Group can use all configured connections
411 | - `"openai,anthropic"` - Comma-separated list of specific connections the group can use
412 | 
413 | This allows fine-grained control over which users can access which AI providers, enabling features like:
414 | 
415 | - Restricting expensive models to premium users
416 | - Creating specialized access tiers for different user groups
417 | - Implementing usage quotas per group
418 | - Billing and cost allocation by user group
419 | 
420 | ### Virtual API Key Validation
421 | 
422 | #### Overview
423 | 
424 | LM-proxy includes 2 built-in methods for validating Virtual API keys:
425 |  - `lm_proxy.api_key_check.check_api_key_in_config` - verifies API keys against those defined in the config file; used by default
426 |  - `lm_proxy.api_key_check.CheckAPIKeyWithRequest` - validates API keys via an external HTTP service
427 | 
428 | The API key check method can be configured using the `api_key_check` configuration key.  
429 | Its value can be either a reference to a Python function in the format `my_module.sub_module1.sub_module2.fn_name`,
430 | or an object containing parameters for a class-based validator.  
431 | 
432 | In the .py config representation, the validator function can be passed directly as a callable.
433 | 
434 | #### Example configuration for external API key validation using HTTP request to Keycloak / OpenID Connect
435 | 
436 | This example shows how to validate API keys against an external service (e.g., Keycloak):
437 | 
438 | ```toml
439 | [api_key_check]
440 | class = "lm_proxy.api_key_check.CheckAPIKeyWithRequest"
441 | method = "POST"
442 | url = "http://keycloak:8080/realms/master/protocol/openid-connect/userinfo"
443 | response_as_user_info = true  # interpret response JSON as user info object for further processing / logging
444 | use_cache = true  # requires installing cachetools if True: pip install cachetools
445 | cache_ttl = 60  # Cache duration in seconds
446 | 
447 | [api_key_check.headers]
448 | Authorization = "Bearer {api_key}"
449 | ```
450 | #### Custom API Key Validation / Extending functionality
451 | 
452 | For more advanced authentication needs,
453 | you can implement a custom validator function:
454 | 
455 | ```python
456 | # my_validators.py
457 | def validate_api_key(api_key: str) -> str | None:
458 |     """
459 |     Validate an API key and return the group name if valid.
460 |     
461 |     Args:
462 |         api_key: The API key to validate
463 |         
464 |     Returns:
465 |         The name of the group if valid, None otherwise
466 |     """
467 |     if api_key == "secret-key":
468 |         return "admin"
469 |     elif api_key.startswith("user-"):
470 |         return "users"
471 |     return None
472 | ```
473 | 
474 | Then reference it in your config:
475 | 
476 | ```toml
477 | api_key_check = "my_validators.validate_api_key"
478 | ```
479 | > **NOTE**
480 | > In this case, the `api_keys` lists in groups are ignored, and the custom function is responsible for all validation logic.
481 | 
482 | 
483 | ## 🛠️ Advanced Usage
484 | ### Dynamic Model Routing
485 | 
486 | The routing section allows flexible pattern matching with wildcards:
487 | 
488 | ```toml
489 | [routing]
490 | "gpt-4*" = "openai.gpt-4"           # Route gpt-4 requests to OpenAI GPT-4
491 | "gpt-3.5*" = "openai.gpt-3.5-turbo" # Route gpt-3.5 requests to OpenAI
492 | "claude*" = "anthropic.*"           # Pass model name as-is to Anthropic
493 | "gemini*" = "google.*"              # Pass model name as-is to Google
494 | "custom*" = "local.llama-7b"        # Map any "custom*" to a specific local model
495 | "*" = "openai.gpt-3.5-turbo"        # Default fallback for unmatched models
496 | ```
497 | Keys are model name patterns (with `*` wildcard support), and values are connection/model mappings.
498 | Connection names reference those defined in the `[connections]` section.
499 | 
500 | ### Load Balancing Example
501 | 
502 | - [Simple load-balancer configuration](https://github.com/Nayjest/lm-proxy/blob/main/examples/load_balancer_config.py)  
503 |   This example demonstrates how to set up a load balancer that randomly
504 | distributes requests across multiple language model servers using the lm_proxy.
505 | 
506 | ### Google Vertex AI Configuration Example
507 | 
508 | - [vertex-ai.toml](https://github.com/Nayjest/lm-proxy/blob/main/examples/vertex-ai.toml)
509 |   This example demonstrates how to connect LM-Proxy to Google Gemini model via Vertex AI API
510 | 
511 | ### Using Tokens from OIDC Provider as Virtual/Client API Keys
512 | 
513 | You can configure LM-Proxy to validate tokens from OpenID Connect (OIDC) providers like Keycloak, Auth0, or Okta as API keys.
514 | 
515 | The following configuration validates Keycloak access tokens by calling the userinfo endpoint:
516 | ```toml
517 | [api_key_check]
518 | class = "lm_proxy.api_key_check.CheckAPIKeyWithRequest"
519 | method = "POST"
520 | url = "http://keycloak:8080/realms/master/protocol/openid-connect/userinfo"
521 | response_as_user_info = true
522 | use_cache = true
523 | cache_ttl = 60
524 | 
525 | [api_key_check.headers]
526 | Authorization = "Bearer {api_key}"
527 | ```
528 | 
529 | **Configuration Parameters:**
530 | 
531 | - `class` - The API key validation handler class ([lm_proxy.api_key_check.CheckAPIKeyWithRequest](https://github.com/Nayjest/lm-proxy/blob/main/lm_proxy/api_key_check/with_request.py))
532 | - `method` - HTTP method for the validation request (typically `POST` or `GET`)
533 | - `url` - The OIDC provider's userinfo endpoint URL
534 | - `response_as_user_info` - Parse the response as user information for further usage in LM-Proxy (extend logged info, determine user group, etc.)
535 | - `use_cache` - Enable caching of validation results (requires installing the `cachetools` package if enabled: `pip install cachetools`)
536 | - `cache_ttl` - Cache time-to-live in seconds (reduces load on identity provider)
537 | - `headers` - Dictionary of headers to send with the validation request
538 | 
539 | > **Note**: The `{api_key}` placeholder can be used in headers or in the URL. LM-Proxy substitutes it with the API key from the client to perform the check.
540 | 
541 | 
542 | **Usage:**
543 | 
544 | Clients pass their OIDC access token as the API key when making requests to LM-Proxy.
545 | 
546 | ## 🧩 Add-on Components
547 | 
548 | ### Database Connector
549 | 
550 | [lm-proxy-db-connector](https://github.com/nayjest/lm-proxy-db-connector) is a lightweight SQLAlchemy-based connector that enables LM-Proxy to work with relational databases including PostgreSQL, MySQL/MariaDB, SQLite, Oracle, Microsoft SQL Server, and many others.
551 | 
552 | **Key Features:**
553 | - Configure database connections directly through LM-Proxy configuration
554 | - Share database connections across components, extensions, and custom functions
555 | - Built-in database logger for structured logging of AI request data
556 | 
557 | ## 🔍 Debugging
558 | 
559 | ### Overview
560 | When **debugging mode** is enabled,
561 | LM-Proxy provides detailed logging information to help diagnose issues:
562 | - Stack traces for exceptions are shown in the console
563 | - Logging level is set to DEBUG instead of INFO
564 | 
565 | > **Warning** ⚠️  
566 | > Never enable debugging mode in production environments, as it may expose sensitive information to the application logs.
567 | 
568 | ### Enabling Debugging Mode
569 | To enable debugging, set the `LM_PROXY_DEBUG` environment variable to a truthy value (e.g., "1", "true", "yes").
570 | > **Tip** 💡  
571 | > Environment variables can also be defined in a `.env` file.
572 | 
573 | Alternatively, you can enable or disable debugging via the command-line arguments:
574 | - `--debug` to enable debugging
575 | - `--no-debug` to disable debugging
576 | 
577 | > **Note** ℹ️   
578 | > CLI arguments override environment variable settings.
579 | 
580 | 
581 | ## 🤝 Contributing
582 | 
583 | Contributions are welcome! Please feel free to submit a Pull Request.
584 | 
585 | 1. Fork the repository
586 | 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
587 | 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
588 | 4. Push to the branch (`git push origin feature/amazing-feature`)
589 | 5. Open a Pull Request
590 | 
591 | 
592 | ## 📄 License
593 | 
594 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
595 | © 2025 Vitalii Stepanenko
596 | 


--------------------------------------------------------------------------------