├── lm_proxy ├── __init__.py ├── __main__.py ├── config_loaders │ ├── toml.py │ ├── json.py │ ├── __init__.py │ ├── python.py │ └── yaml.py ├── api_key_check │ ├── __init__.py │ ├── in_config.py │ ├── allow_all.py │ └── with_request.py ├── models_endpoint.py ├── app.py ├── base_types.py ├── loggers.py ├── utils.py ├── bootstrap.py ├── config.py └── core.py ├── tests ├── configs │ ├── __init__.py │ ├── no_api_key_check.yml │ ├── test_config.toml │ ├── test_config.yml │ ├── test_config.json │ ├── config_fn.py │ └── test_disabled.py ├── __init__.py ├── test_resolve_connection_and_model.py ├── test_api_key_check_allow_all.py ├── test_config_loaders.py ├── test_api_key_check_errors.py ├── conftest.py ├── test_models_endpoint.py ├── test_loggers.py ├── test_integration.py └── test_utils.py ├── .flake8 ├── .gitignore ├── .env.template ├── .github └── workflows │ ├── code-style.yml │ ├── gito-code-review.yml │ ├── tests.yml │ └── gito-react-to-comments.yml ├── Makefile ├── coverage.svg ├── examples ├── vertex-ai.toml └── load_balancer_config.py ├── LICENSE ├── multi-build.py ├── config.toml ├── pyproject.toml └── README.md /lm_proxy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/configs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # This file enables pytest to discover tests in this directory 2 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | exclude = 4 | .git, 5 | __pycache__, 6 | .venv, 7 | .aico/* 8 | -------------------------------------------------------------------------------- /lm_proxy/__main__.py: -------------------------------------------------------------------------------- 1 | """Provides the CLI entry point when the package is executed as a Python module.""" 2 | from .app import cli_app 3 | 4 | 5 | if __name__ == "__main__": 6 | cli_app() 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .idea/ 3 | venv 4 | .env* 5 | !.env.template 6 | .pytest_cache 7 | storage 8 | dist 9 | docs 10 | .aico/* 11 | !.aico/project.json 12 | .coverage 13 | coverage.xml 14 | -------------------------------------------------------------------------------- /tests/configs/no_api_key_check.yml: -------------------------------------------------------------------------------- 1 | host: "127.0.0.1" 2 | port: 8787 3 | api_key_check: "lm_proxy.api_key_check.AllowAll" 4 | connections: 5 | test: "tests.conftest.llm_ok_connection" 6 | routing: 7 | "*": "test" 8 | -------------------------------------------------------------------------------- /lm_proxy/config_loaders/toml.py: -------------------------------------------------------------------------------- 1 | """TOML configuration loader.""" 2 | import tomllib 3 | 4 | 5 | def load_toml_config(config_path: str) -> dict: 6 | """Loads configuration from a TOML file.""" 7 | with open(config_path, "rb") as f: 8 | return tomllib.load(f) 9 | -------------------------------------------------------------------------------- /lm_proxy/config_loaders/json.py: -------------------------------------------------------------------------------- 1 | """JSON configuration loader.""" 2 | import json 3 | 4 | 5 | def load_json_config(config_path: str) -> dict: 6 | """Loads configuration from a JSON file.""" 7 | with open(config_path, "r", encoding="utf-8") as f: 8 | return json.load(f) 9 | -------------------------------------------------------------------------------- /lm_proxy/api_key_check/__init__.py: -------------------------------------------------------------------------------- 1 | """Collection of built-in API-key checkers for usage in the configuration.""" 2 | from .in_config import check_api_key_in_config 3 | from .with_request import CheckAPIKeyWithRequest 4 | from .allow_all import AllowAll 5 | 6 | __all__ = ["check_api_key_in_config", "CheckAPIKeyWithRequest", "AllowAll"] 7 | -------------------------------------------------------------------------------- /lm_proxy/config_loaders/__init__.py: -------------------------------------------------------------------------------- 1 | """Built-in configuration loaders for different file formats.""" 2 | from .python import load_python_config 3 | from .toml import load_toml_config 4 | from .yaml import load_yaml_config 5 | from .json import load_json_config 6 | 7 | __all__ = [ 8 | "load_python_config", 9 | "load_toml_config", 10 | "load_yaml_config", 11 | "load_json_config", 12 | ] 13 | -------------------------------------------------------------------------------- /.env.template: -------------------------------------------------------------------------------- 1 | # Can be referenced in the lm-proxy configuration files following way: 2 | # api_key = "env:" 3 | OPENAI_API_KEY=sk-u........ 4 | GOOGLE_API_KEY=AI........ 5 | ANTHROPIC_API_KEY=sk-ant-api03--vE........ 6 | 7 | # "1", "TRUE", "YES", "ON", "ENABLED", "Y", "+" are true, case-insensitive. 8 | # See https://github.com/Nayjest/ai-microcore/blob/v4.4.3/microcore/configuration.py#L36 9 | LM_PROXY_DEBUG=no 10 | -------------------------------------------------------------------------------- /.github/workflows/code-style.yml: -------------------------------------------------------------------------------- 1 | name: Code Style 2 | 3 | on: [push] 4 | 5 | jobs: 6 | cs: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - uses: actions/setup-python@v3 11 | with: 12 | python-version: 3.12 13 | - name: Install dependencies 14 | run: pip install --upgrade pip flake8 pylint 15 | - name: Run flake8 16 | run: flake8 . 17 | -------------------------------------------------------------------------------- /lm_proxy/config_loaders/python.py: -------------------------------------------------------------------------------- 1 | """Loader for Python configuration files.""" 2 | import importlib.util 3 | from ..config import Config 4 | 5 | 6 | def load_python_config(config_path: str) -> Config: 7 | """Load configuration from a Python file.""" 8 | spec = importlib.util.spec_from_file_location("config_module", config_path) 9 | config_module = importlib.util.module_from_spec(spec) 10 | spec.loader.exec_module(config_module) 11 | return config_module.config 12 | -------------------------------------------------------------------------------- /tests/configs/test_config.toml: -------------------------------------------------------------------------------- 1 | host = "127.0.0.1" 2 | port = 8787 3 | [connections] 4 | 5 | [connections.test_openai] 6 | api_type = "open_ai" 7 | api_base = "https://api.openai.com/v1/" 8 | api_key = "env:OPENAI_API_KEY" 9 | 10 | [connections.test_google] 11 | api_type = "google_ai_studio" 12 | api_key = "env:GOOGLE_API_KEY" 13 | 14 | [connections.test_anthropic] 15 | api_type = "anthropic" 16 | api_key = "env:ANTHROPIC_API_KEY" 17 | [routing] 18 | "gpt*" = "test_openai.*" 19 | "claude*" = "test_anthropic.*" 20 | "gemini*" = "test_google.*" 21 | "*" = "test_openai.gpt-5" 22 | [groups.default] 23 | api_keys = [] -------------------------------------------------------------------------------- /tests/configs/test_config.yml: -------------------------------------------------------------------------------- 1 | host: "127.0.0.1" 2 | port: 8787 3 | 4 | connections: 5 | test_openai: 6 | api_type: "open_ai" 7 | api_base: "https://api.openai.com/v1/" 8 | api_key: "env:OPENAI_API_KEY" 9 | test_google: 10 | api_type: "google_ai_studio" 11 | api_key: "env:GOOGLE_API_KEY" 12 | test_anthropic: 13 | api_type: "anthropic" 14 | api_key: "env:ANTHROPIC_API_KEY" 15 | 16 | routing: 17 | "gpt*": "test_openai.*" 18 | "claude*": "test_anthropic.*" 19 | "gemini*": "test_google.*" 20 | "*": "test_openai.gpt-5" 21 | 22 | groups: 23 | default: 24 | api_keys: [] 25 | -------------------------------------------------------------------------------- /lm_proxy/config_loaders/yaml.py: -------------------------------------------------------------------------------- 1 | """YAML configuration loader.""" 2 | 3 | 4 | def load_yaml_config(config_path: str) -> dict: 5 | """Loads a YAML configuration file and returns its contents as a dictionary.""" 6 | try: 7 | import yaml # pylint: disable=import-outside-toplevel 8 | except ImportError as e: 9 | raise ImportError( 10 | "Missing optional dependency 'PyYAML'. " 11 | "For using YAML configuration files with LM-Proxy, " 12 | "please install it with the following command: 'pip install pyyaml'." 13 | ) from e 14 | 15 | with open(config_path, "r", encoding="utf-8") as f: 16 | return yaml.safe_load(f) 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | fastapi run lm_proxy 3 | # alternative: python -m lm_proxy 4 | start: run 5 | cs: 6 | flake8 . 7 | black: 8 | black . 9 | 10 | 11 | install: 12 | pip install -e . 13 | 14 | pkg: 15 | python multi-build.py 16 | build: pkg 17 | 18 | clear-dist: 19 | python -c "import shutil, os; shutil.rmtree('dist', ignore_errors=True); os.makedirs('dist', exist_ok=True)" 20 | clr-dist: clear-dist 21 | 22 | publish: 23 | python -c "import os;t=os.getenv('PYPI_TOKEN');__import__('subprocess').run(f'python -m twine upload dist/* -u __token__ -p {t}',shell=True)" 24 | 25 | upload: publish 26 | test: 27 | pytest --log-cli-level=INFO 28 | tests: test 29 | integration-test: 30 | pytest tests/test_integration.py -v -------------------------------------------------------------------------------- /tests/configs/test_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "host": "127.0.0.1", 3 | "port": 8787, 4 | "connections": { 5 | "test_openai": { 6 | "api_type": "open_ai", 7 | "api_base": "https://api.openai.com/v1/", 8 | "api_key": "env:OPENAI_API_KEY" 9 | }, 10 | "test_google": { 11 | "api_type": "google_ai_studio", 12 | "api_key": "env:GOOGLE_API_KEY" 13 | }, 14 | "test_anthropic": { 15 | "api_type": "anthropic", 16 | "api_key": "env:ANTHROPIC_API_KEY" 17 | } 18 | }, 19 | "routing": { 20 | "gpt*": "test_openai.*", 21 | "claude*": "test_anthropic.*", 22 | "gemini*": "test_google.*", 23 | "*": "test_openai.gpt-5" 24 | }, 25 | "groups": { 26 | "default": { 27 | "api_keys": [] 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /tests/configs/config_fn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | import microcore as mc 4 | 5 | root = Path(__file__).resolve().parents[3] 6 | sys.path.append(str(root)) 7 | 8 | from lm_proxy.config import Config, Group # noqa 9 | 10 | 11 | def custom_api_key_check(api_key: str) -> str | None: 12 | return "default" if api_key == "py-test" else None 13 | 14 | 15 | mc.configure( 16 | DOT_ENV_FILE=".env", 17 | EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE, 18 | ) 19 | 20 | config = Config( 21 | port=8123, 22 | host="127.0.0.1", 23 | api_key_check=custom_api_key_check, 24 | connections={"py_oai": mc.env().llm_async_function}, 25 | routing={"*": "py_oai.gpt-3.5-turbo", "my-gpt": "py_oai.gpt-3.5-turbo"}, 26 | groups={"default": Group(connections="*")}, 27 | ) 28 | -------------------------------------------------------------------------------- /tests/test_resolve_connection_and_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from lm_proxy.config import Config 3 | from lm_proxy.core import resolve_connection_and_model 4 | 5 | 6 | async def test_resolve_connection_and_model(): 7 | c = Config(connections={"a": {}, "b": {}, "c": {}}) 8 | with pytest.raises(ValueError, match="matched"): 9 | resolve_connection_and_model(c, "model") 10 | c.routing = { 11 | "client-model": "a.provider-model", 12 | } 13 | assert resolve_connection_and_model(c, "client-model") == ("a", "provider-model") 14 | 15 | c.routing["gpt*"] = "c.model" 16 | assert resolve_connection_and_model(c, "gpt-8") == ("c", "model") 17 | 18 | c.routing["*"] = "b.*" 19 | assert resolve_connection_and_model(c, "client-model2") == ("b", "client-model2") 20 | -------------------------------------------------------------------------------- /tests/test_api_key_check_allow_all.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from starlette.requests import Request 4 | 5 | from lm_proxy.bootstrap import bootstrap 6 | from lm_proxy.core import check 7 | from lm_proxy.api_key_check import AllowAll 8 | 9 | 10 | async def test_allow_all(): 11 | root = Path(__file__).resolve().parent 12 | bootstrap(root / "configs" / "no_api_key_check.yml") 13 | assert await check(Request(scope={ 14 | "type": "http", 15 | "headers": [], 16 | })) == ("default", "", {"api_key": ""}) 17 | 18 | # Test with key 19 | assert await check(Request(scope={ 20 | "type": "http", 21 | "headers": [(b"authorization", b"Bearer 11")], 22 | })) == ("default", "11", {"api_key": "11"}) 23 | 24 | assert AllowAll()("") == ("default", {"api_key": ""}) 25 | assert AllowAll(capture_api_key=False)("") == ("default", {}) 26 | -------------------------------------------------------------------------------- /tests/test_config_loaders.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | import dotenv 5 | import pytest 6 | 7 | from lm_proxy.config import Config 8 | 9 | 10 | def test_config_loaders(): 11 | root = Path(__file__).resolve().parent 12 | dotenv.load_dotenv(root.parent / ".env.template", override=True) 13 | oai_key = os.getenv("OPENAI_API_KEY") 14 | toml = Config.load(root / "configs" / "test_config.toml") 15 | json = Config.load(root / "configs" / "test_config.json") 16 | yaml = Config.load(root / "configs" / "test_config.yml") 17 | 18 | assert json.model_dump() == yaml.model_dump() == toml.model_dump() 19 | assert json.connections["test_openai"]["api_key"] == oai_key 20 | 21 | py = Config.load(root / "configs" / "config_fn.py") 22 | assert isinstance(py, Config) 23 | 24 | # Expect an error for unsupported format 25 | with pytest.raises(ValueError): 26 | Config.load(root / "configs" / "test_config.xyz") 27 | -------------------------------------------------------------------------------- /tests/test_api_key_check_errors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from starlette.requests import Request 4 | from fastapi import HTTPException 5 | 6 | from lm_proxy.bootstrap import bootstrap 7 | from lm_proxy.config import Config 8 | from lm_proxy.core import check 9 | 10 | 11 | async def test_disabled(): 12 | bootstrap(Config(enabled=False, connections={})) 13 | with pytest.raises(HTTPException, match="disabled"): 14 | await check(Request(scope={ 15 | "type": "http", 16 | "headers": [], 17 | })) 18 | 19 | 20 | async def test_403(): 21 | bootstrap(Config(connections={})) 22 | with pytest.raises(HTTPException) as excinfo: 23 | await check(Request(scope={ 24 | "type": "http", 25 | "headers": [ 26 | (b"authorization", b"Bearer mykey"), 27 | ], 28 | })) 29 | assert excinfo.value.status_code == 403 30 | assert "Incorrect API key" in str(excinfo.value) 31 | -------------------------------------------------------------------------------- /lm_proxy/api_key_check/in_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | API Key check implementation that validates against configured groups. 3 | 4 | Checks if a provided API key exists within any of the defined groups. 5 | For using this function, 6 | set "api_key_check" configuration value to "lm_proxy.api_key_check.check_api_key_in_config". 7 | """ 8 | from typing import Optional 9 | from ..bootstrap import env 10 | 11 | 12 | def check_api_key_in_config(api_key: Optional[str]) -> Optional[str]: 13 | """ 14 | Validates a Client API key against configured groups and returns the matching group name. 15 | 16 | Args: 17 | api_key (Optional[str]): The Virtual / Client API key to validate. 18 | Returns: 19 | Optional[str]: The group name if the API key is valid and found in a group, 20 | None otherwise. 21 | """ 22 | for group_name, group in env.config.groups.items(): 23 | if api_key in group.api_keys: 24 | return group_name 25 | return None 26 | -------------------------------------------------------------------------------- /coverage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | coverage 17 | coverage 18 | 71% 19 | 71% 20 | 21 | 22 | -------------------------------------------------------------------------------- /examples/vertex-ai.toml: -------------------------------------------------------------------------------- 1 | # Pre-requisites: Python v3.11 / v3.12 / v3.13 2 | # Steps: 3 | # 1. Install LM-Proxy: 4 | # > pip install lm-proxy 5 | # 2. Install Vertex AI API: 6 | # > pip install vertexai 7 | # 3. Install Google Cloud SDK: https://cloud.google.com/sdk/docs/install 8 | # 4. Authenticate with Google Cloud: 9 | # > gcloud auth application-default login 10 | # 5. Set your Google Cloud project: 11 | # > gcloud config set project 12 | # 6. Save this config as `vertex-ai.toml` 13 | # 7. Fill in connections.vertex_ai.google_vertex_project_id and Virtual API Keys for usage in LLM client applications. 14 | # 8. Run LM-Proxy with this config: 15 | # > lm-proxy --config vertex-ai.toml 16 | 17 | [connections] 18 | [connections.vertex_ai] 19 | api_type = "google_vertex_ai" 20 | google_vertex_project_id = "" 21 | google_vertex_gcloud_auth = true 22 | google_vertex_response_validation = true 23 | model = "gemini-2.5-pro" 24 | 25 | [routing] 26 | "gemini-2.5-pro" = "vertex_ai.gemini-2.5-pro" 27 | 28 | [groups.default] 29 | # Your Virual API Keys here 30 | api_keys = [ 31 | "KEY1", 32 | "KEY2" 33 | ] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Vitalii Stepanenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | import subprocess 4 | import time 5 | import signal 6 | from pathlib import Path 7 | from dataclasses import dataclass, field 8 | from typing import Any 9 | import microcore as mc 10 | 11 | 12 | @dataclass 13 | class ServerFixture: 14 | port: int 15 | process: Any 16 | api_key: str 17 | model: str = field(default=None) 18 | 19 | 20 | @pytest.fixture(scope="session") 21 | def server_config_fn(): 22 | """Fixture that starts the LM-Proxy server for testing and stops it after tests complete.""" 23 | test_config_path = Path("tests/configs/config_fn.py") 24 | server_process = subprocess.Popen( 25 | [sys.executable, "-m", "lm_proxy.app", "--config", str(test_config_path)], 26 | ) 27 | time.sleep(2) 28 | from tests.configs.config_fn import config 29 | 30 | yield ServerFixture( 31 | port=config.port, 32 | process=server_process, 33 | model="any-model", 34 | api_key="py-test", 35 | ) 36 | server_process.send_signal(signal.SIGTERM) 37 | server_process.wait() 38 | 39 | 40 | async def llm_ok_connection(*args, **kwargs): 41 | return mc.LLMResponse("ok") 42 | -------------------------------------------------------------------------------- /multi-build.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | import subprocess 4 | 5 | NAMES = [ 6 | ["lm-proxy", "LM-Proxy"], 7 | ["llm-proxy-server", "LLM Proxy Server"], 8 | ["ai-proxy-server", "AI Proxy Server"], 9 | ["lm-proxy-server", "LM Proxy Server"], 10 | ["openai-http-proxy", "OpenAI HTTP Proxy"], 11 | ["inference-proxy", "Inference Proxy"], 12 | ["oai-proxy", "OAI Proxy"], 13 | ] 14 | FILES = [ 15 | "pyproject.toml", 16 | "README.md", 17 | ] 18 | 19 | 20 | def replace_name(old_names: list[str], new_names: list[str], files: list[str] = None): 21 | files = files or FILES 22 | for i in range(len(old_names)): 23 | old_name = old_names[i] 24 | new_name = new_names[i] 25 | for path in files: 26 | p = Path(path) 27 | p.write_text( 28 | re.sub( 29 | fr'(? tuple[str, dict[str, Optional[str]]]: 32 | """ 33 | Validate an API key (accepts all keys without verification). 34 | 35 | Args: 36 | api_key: The API key to validate. Can be None. 37 | 38 | Returns: 39 | A tuple containing: 40 | - The default group identifier (str) 41 | - user_info dictionary with the API key if capture_api_key 42 | is True, otherwise an empty dictionary 43 | 44 | Note: 45 | This method never raises authentication errors and always returns 46 | successfully, regardless of the input. 47 | """ 48 | user_info = {"api_key": api_key} if self.capture_api_key else {} 49 | return self.group, user_info 50 | -------------------------------------------------------------------------------- /lm_proxy/models_endpoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Models list endpoint 3 | """ 4 | 5 | from starlette.requests import Request 6 | from starlette.responses import JSONResponse 7 | 8 | from .bootstrap import env 9 | from .core import check, parse_routing_rule 10 | from .config import ModelListingMode, Group 11 | 12 | 13 | async def models(request: Request) -> JSONResponse: 14 | """ 15 | Lists available models based on routing rules and group permissions. 16 | """ 17 | group_name, api_key, user_info = await check(request) 18 | group: Group = env.config.groups[group_name] 19 | models_list = [] 20 | for model_pattern, route in env.config.routing.items(): 21 | connection_name, _ = parse_routing_rule(route, env.config) 22 | if group.allows_connecting_to(connection_name): 23 | is_model_name = not ("*" in model_pattern or "?" in model_pattern) 24 | if not is_model_name: 25 | if env.config.model_listing_mode != ModelListingMode.AS_IS: 26 | if ( 27 | env.config.model_listing_mode 28 | == ModelListingMode.IGNORE_WILDCARDS 29 | ): 30 | continue 31 | raise NotImplementedError( 32 | f"'{env.config.model_listing_mode}' model listing mode " 33 | f"is not implemented yet" 34 | ) 35 | model_data = { 36 | "id": model_pattern, 37 | "object": "model", 38 | "created": 0, 39 | "owned_by": connection_name, 40 | } 41 | 42 | if aux_info := env.config.model_info.get(model_pattern): 43 | model_data.update(aux_info) 44 | models_list.append(model_data) 45 | 46 | return JSONResponse( 47 | { 48 | "object": "list", 49 | "data": models_list, 50 | } 51 | ) 52 | -------------------------------------------------------------------------------- /.github/workflows/gito-code-review.yml: -------------------------------------------------------------------------------- 1 | name: "Gito: AI Code Reviewer" 2 | on: 3 | pull_request: 4 | types: [opened, synchronize, reopened] 5 | workflow_dispatch: 6 | inputs: 7 | pr_number: 8 | description: "Pull Request number" 9 | required: true 10 | jobs: 11 | review: 12 | runs-on: ubuntu-latest 13 | permissions: { contents: read, pull-requests: write } # 'write' for leaving the summary comment 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: { fetch-depth: 0 } 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: { python-version: "3.13" } 21 | 22 | - name: Fetch Latest Gito Version 23 | id: gito-version 24 | run: pip index versions gito.bot 2>/dev/null | head -1 | sed -n 's/.* (\([^)]*\)).*/version=\1/p' >> $GITHUB_OUTPUT 25 | 26 | - uses: actions/cache@v4 27 | id: cache 28 | with: 29 | path: | 30 | ${{ env.pythonLocation }}/lib/python3.13/site-packages 31 | ${{ env.pythonLocation }}/bin 32 | key: gito_v${{ steps.gito-version.outputs.version }} 33 | 34 | - name: Install Gito 35 | if: steps.cache.outputs.cache-hit != 'true' 36 | run: pip install gito.bot~=3.2 37 | 38 | - name: Run AI code review 39 | env: 40 | LLM_API_TYPE: open_ai 41 | LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} 42 | MODEL: gpt-5 43 | JIRA_TOKEN: ${{ secrets.JIRA_TOKEN }} 44 | JIRA_URL: ${{ secrets.JIRA_URL }} 45 | JIRA_USER: ${{ secrets.JIRA_USER }} 46 | LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }} 47 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 48 | PR_NUMBER_FROM_WORKFLOW_DISPATCH: ${{ github.event.inputs.pr_number }} 49 | run: | 50 | gito --verbose review 51 | gito github-comment 52 | 53 | - uses: actions/upload-artifact@v4 54 | with: 55 | name: gito-code-review-results 56 | path: | 57 | code-review-report.md 58 | code-review-report.json -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: write 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.11", "3.12", "3.13"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | pip install poetry 30 | poetry install 31 | echo "$(poetry env info --path)/bin" >> $GITHUB_PATH 32 | - name: Test with pytest 33 | if: matrix.python-version != '3.13' 34 | env: 35 | LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} 36 | run: | 37 | pytest 38 | - name: Test with pytest +coverage 39 | if: matrix.python-version == '3.13' 40 | env: 41 | LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} 42 | run: | 43 | pytest --cov=lm_proxy --cov-report=xml 44 | - name: Generate coverage badge 45 | if: matrix.python-version == '3.13' && (github.event_name == 'push' || github.event_name == 'pull_request') 46 | uses: tj-actions/coverage-badge-py@v2 47 | with: 48 | output: 'coverage.svg' 49 | - name: Commit coverage badge 50 | if: matrix.python-version == '3.13' && (github.event_name == 'push' || github.event_name == 'pull_request') 51 | run: | 52 | git config --local user.email "action@github.com" 53 | git config --local user.name "GitHub Action" 54 | git fetch origin 55 | git checkout ${{ github.head_ref || github.ref_name }} -- 56 | git add coverage.svg 57 | git commit -m "Update coverage badge [skip ci]" || echo "No changes to commit" 58 | git push 59 | env: 60 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 61 | 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "lm-proxy" 3 | version = "2.1.1" 4 | description = "\"LM-Proxy\" is OpenAI-compatible http proxy server for inferencing various LLMs capable of working with Google, Anthropic, OpenAI APIs, local PyTorch inference, etc." 5 | readme = "README.md" 6 | keywords = ["llm", "large language models", "ai", "gpt", "openai", "proxy", "http", "proxy-server"] 7 | classifiers = [ 8 | "Intended Audience :: Developers", 9 | "Operating System :: OS Independent", 10 | "Programming Language :: Python :: 3", 11 | "Programming Language :: Python :: 3.11", 12 | "Programming Language :: Python :: 3.12", 13 | "Programming Language :: Python :: 3.13", 14 | "License :: OSI Approved :: MIT License", 15 | ] 16 | dependencies = [ 17 | "ai-microcore~=4.4.4", 18 | "fastapi>=0.121.3,<1", 19 | "uvicorn>=0.22.0", 20 | "typer>=0.16.1", 21 | "requests~=2.32.3", 22 | "pydantic~=2.12.3", 23 | ] 24 | 25 | requires-python = ">=3.11,<4" 26 | 27 | authors = [ 28 | { name = "Vitalii Stepanenko", email = "mail@vitaliy.in" }, 29 | ] 30 | maintainers = [ 31 | { name = "Vitalii Stepanenko", email = "mail@vitaliy.in" }, 32 | ] 33 | license = { file = "LICENSE" } 34 | 35 | [project.urls] 36 | "Source Code" = "https://github.com/Nayjest/lm-proxy" 37 | 38 | [project.entry-points."config.loaders"] 39 | toml = "lm_proxy.config_loaders:load_toml_config" 40 | py = "lm_proxy.config_loaders:load_python_config" 41 | yml = "lm_proxy.config_loaders:load_yaml_config" 42 | yaml = "lm_proxy.config_loaders:load_yaml_config" 43 | json = "lm_proxy.config_loaders:load_json_config" 44 | 45 | [build-system] 46 | requires = ["poetry-core"] 47 | build-backend = "poetry.core.masonry.api" 48 | 49 | [tool.poetry] 50 | package-mode = true 51 | packages = [{ include = "lm_proxy"}] 52 | 53 | [tool.poetry.group.test.dependencies] 54 | pytest = "~=8.4.2" 55 | pytest-asyncio = "~=1.2.0" 56 | pytest-cov = "~7.0.0" 57 | 58 | [tool.poetry.scripts] 59 | lm-proxy = "lm_proxy.app:cli_app" 60 | 61 | [tool.pytest.ini_options] 62 | asyncio_mode = "auto" 63 | testpaths = [ 64 | "tests", 65 | ] 66 | -------------------------------------------------------------------------------- /lm_proxy/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | LM-Proxy Application Entrypoint 3 | """ 4 | import logging 5 | from typing import Optional 6 | from fastapi import FastAPI 7 | import typer 8 | import uvicorn 9 | 10 | from .bootstrap import env, bootstrap 11 | from .core import chat_completions 12 | from .models_endpoint import models 13 | 14 | cli_app = typer.Typer() 15 | 16 | 17 | @cli_app.callback(invoke_without_command=True) 18 | def run_server( 19 | config: Optional[str] = typer.Option(None, help="Path to the configuration file"), 20 | debug: Optional[bool] = typer.Option( 21 | None, help="Enable debug mode (more verbose logging)" 22 | ), 23 | env_file: Optional[str] = typer.Option( 24 | ".env", 25 | "--env", 26 | "--env-file", 27 | "--env_file", 28 | help="Set the .env file to load ENV vars from", 29 | ), 30 | ): 31 | """ 32 | Default command for CLI application: Run LM-Proxy web server 33 | """ 34 | try: 35 | bootstrap(config=config or "config.toml", env_file=env_file, debug=debug) 36 | uvicorn.run( 37 | "lm_proxy.app:web_app", 38 | host=env.config.host, 39 | port=env.config.port, 40 | ssl_keyfile=env.config.ssl_keyfile or None, 41 | ssl_certfile=env.config.ssl_certfile or None, 42 | reload=env.config.dev_autoreload, 43 | factory=True, 44 | ) 45 | except Exception as e: 46 | if env.debug: 47 | raise 48 | logging.error(e) 49 | raise typer.Exit(code=1) 50 | 51 | 52 | def web_app(): 53 | """ 54 | Entrypoint for ASGI server 55 | """ 56 | app = FastAPI( 57 | title="LM-Proxy", description="OpenAI-compatible proxy server for LLM inference" 58 | ) 59 | app.add_api_route( 60 | path=f"{env.config.api_prefix}/chat/completions", 61 | endpoint=chat_completions, 62 | methods=["POST"], 63 | ) 64 | app.add_api_route( 65 | path=f"{env.config.api_prefix}/models", 66 | endpoint=models, 67 | methods=["GET"], 68 | ) 69 | return app 70 | 71 | 72 | if __name__ == "__main__": 73 | cli_app() 74 | -------------------------------------------------------------------------------- /lm_proxy/base_types.py: -------------------------------------------------------------------------------- 1 | """Base types used in LM-Proxy.""" 2 | import uuid 3 | from dataclasses import dataclass, field 4 | from datetime import datetime 5 | from typing import List, Optional, TYPE_CHECKING 6 | 7 | import microcore as mc 8 | from pydantic import BaseModel 9 | 10 | if TYPE_CHECKING: 11 | from .config import Group 12 | 13 | 14 | class ChatCompletionRequest(BaseModel): 15 | """ 16 | Request model for chat/completions endpoint. 17 | """ 18 | model: str 19 | messages: List[mc.Msg | dict] 20 | # | dict --> support of messages with lists of dicts 21 | # defining distinct content-parts inside 'content' field 22 | stream: Optional[bool] = None 23 | max_tokens: Optional[int] = None 24 | temperature: Optional[float] = None 25 | top_p: Optional[float] = None 26 | n: Optional[int] = None 27 | stop: Optional[List[str]] = None 28 | presence_penalty: Optional[float] = None 29 | frequency_penalty: Optional[float] = None 30 | user: Optional[str] = None 31 | 32 | 33 | @dataclass 34 | class RequestContext: # pylint: disable=too-many-instance-attributes 35 | """ 36 | Stores information about a single LLM request/response cycle for usage in middleware. 37 | """ 38 | id: Optional[str] = field(default_factory=lambda: str(uuid.uuid4())) 39 | request: Optional[ChatCompletionRequest] = field(default=None) 40 | response: Optional[mc.LLMResponse] = field(default=None) 41 | error: Optional[Exception] = field(default=None) 42 | group: Optional["Group"] = field(default=None) 43 | connection: Optional[str] = field(default=None) 44 | model: Optional[str] = field(default=None) 45 | api_key_id: Optional[str] = field(default=None) 46 | remote_addr: Optional[str] = field(default=None) 47 | created_at: Optional[datetime] = field(default_factory=datetime.now) 48 | duration: Optional[float] = field(default=None) 49 | user_info: Optional[dict] = field(default=None) 50 | extra: dict = field(default_factory=dict) 51 | 52 | def to_dict(self) -> dict: 53 | """Export as dictionary.""" 54 | data = self.__dict__.copy() 55 | if self.request: 56 | data["request"] = self.request.model_dump(mode="json") 57 | return data 58 | -------------------------------------------------------------------------------- /tests/test_loggers.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import microcore as mc 4 | 5 | from lm_proxy.core import log_non_blocking 6 | from lm_proxy.base_types import ChatCompletionRequest, RequestContext 7 | from lm_proxy.config import Config 8 | from lm_proxy.bootstrap import bootstrap 9 | from lm_proxy.utils import CustomJsonEncoder 10 | 11 | 12 | async def test_custom_config(): 13 | 14 | logs = [] 15 | bootstrap( 16 | Config( 17 | connections={}, 18 | loggers=[ 19 | { 20 | "class": "lm_proxy.loggers.BaseLogger", 21 | "log_writer": lambda data: logs.append( 22 | json.dumps(data, cls=CustomJsonEncoder) 23 | ), 24 | } 25 | ], 26 | ) 27 | ) 28 | request = ChatCompletionRequest( 29 | model="gpt-3.5-turbo", 30 | messages=[{"role": "user", "content": "Test request message"}], 31 | ) 32 | response = mc.LLMResponse("Test response message", dict(prompt=request.messages)) 33 | task = await log_non_blocking(RequestContext(request=request, response=response)) 34 | if task: 35 | await task 36 | assert len(logs) == 1 37 | log_data = json.loads(logs[0]) 38 | assert log_data["request"]["model"] == "gpt-3.5-turbo" 39 | assert log_data["response"] == "Test response message" 40 | 41 | 42 | async def test_json(tmp_path): 43 | bootstrap( 44 | Config( 45 | connections={}, 46 | loggers=[ 47 | { 48 | "class": "lm_proxy.loggers.BaseLogger", 49 | "log_writer": { 50 | "class": "lm_proxy.loggers.JsonLogWriter", 51 | "file_name": tmp_path / "json_log.log", 52 | }, 53 | } 54 | ], 55 | ) 56 | ) 57 | request = ChatCompletionRequest( 58 | model="gpt-3.5-turbo", 59 | messages=[{"role": "user", "content": "Test request message"}], 60 | ) 61 | response = mc.LLMResponse("Test response message", dict(prompt=request.messages)) 62 | task = await log_non_blocking(RequestContext(request=request, response=response)) 63 | if task: 64 | await task 65 | task = await log_non_blocking(RequestContext(request=request, response=response)) 66 | if task: 67 | await task 68 | with open(tmp_path / "json_log.log", "r") as f: 69 | lines = f.readlines() 70 | assert len(lines) == 2 71 | log_data = json.loads(lines[0]) 72 | assert log_data["request"]["model"] == "gpt-3.5-turbo" 73 | assert log_data["response"] == "Test response message" 74 | -------------------------------------------------------------------------------- /tests/test_integration.py: -------------------------------------------------------------------------------- 1 | import microcore as mc 2 | import requests 3 | from tests.conftest import ServerFixture 4 | 5 | 6 | def configure_mc_to_use_local_proxy(cfg: ServerFixture): 7 | mc.configure( 8 | LLM_API_TYPE="openai", 9 | LLM_API_BASE=f"http://127.0.0.1:{cfg.port}/v1", # Test server port 10 | LLM_API_KEY=cfg.api_key, # Not used but required 11 | MODEL=cfg.model, 12 | ) 13 | 14 | 15 | def test_france_capital_query(server_config_fn: ServerFixture): 16 | configure_mc_to_use_local_proxy(server_config_fn) 17 | response = mc.llm("What is the capital of France?\n (!) Respond with 1 word.") 18 | assert ( 19 | "paris" in response.lower().strip() 20 | ), f"Expected 'Paris' in response, got: {response}" 21 | 22 | 23 | def test_direct_api_call(server_config_fn: ServerFixture): 24 | """Test directly calling the API without microcore.""" 25 | cfg = server_config_fn 26 | response = requests.post( 27 | f"http://127.0.0.1:{cfg.port}/v1/chat/completions", 28 | json={ 29 | "model": cfg.model, 30 | "messages": [{"role": "user", "content": "What is the capital of France?"}], 31 | }, 32 | headers={ 33 | "Content-Type": "application/json", 34 | "authorization": f"bearer {cfg.api_key}", 35 | }, 36 | timeout=120, 37 | ) 38 | 39 | assert ( 40 | response.status_code == 200 41 | ), f"Expected status code 200, got {response.status_code}" 42 | 43 | data = response.json() 44 | assert "choices" in data, f"Missing 'choices' in response: {data}" 45 | assert len(data["choices"]) > 0, "No choices returned" 46 | assert ( 47 | "message" in data["choices"][0] 48 | ), f"Missing 'message' in first choice: {data['choices'][0]}" 49 | assert ( 50 | "Paris" in data["choices"][0]["message"]["content"] 51 | ), f"Expected 'Paris' in response, got: {data['choices'][0]['message']['content']}" 52 | 53 | 54 | def test_streaming_response(server_config_fn: ServerFixture): 55 | configure_mc_to_use_local_proxy(server_config_fn) 56 | collected_text = [] 57 | mc.llm( 58 | "Count from 1 to 5, each number as english word (one, two, ...) on a new line", 59 | callback=lambda chunk: collected_text.append(str(chunk).lower()), 60 | ) 61 | full_response = "".join(collected_text) 62 | for i in ["one", "two", "three", "four", "five"]: 63 | assert i in full_response, f"Expected '{i}' in response, got: {full_response}" 64 | assert len(collected_text) >= 1 65 | 66 | 67 | def test_models(server_config_fn: ServerFixture): 68 | """Test directly calling the API without microcore.""" 69 | cfg = server_config_fn 70 | from openai import OpenAI 71 | 72 | client = OpenAI(api_key=cfg.api_key, base_url=f"http://127.0.0.1:{cfg.port}/v1") 73 | models = client.models.list() 74 | assert len(models.data) == 2, "Wrong models returned" 75 | model_ids = {model.id for model in models.data} 76 | assert model_ids == {"my-gpt", "*"} 77 | -------------------------------------------------------------------------------- /lm_proxy/loggers.py: -------------------------------------------------------------------------------- 1 | """LLM Request logging.""" 2 | import abc 3 | import json 4 | import os 5 | from dataclasses import dataclass, field 6 | from typing import Union, Callable 7 | 8 | from .base_types import RequestContext 9 | from .utils import CustomJsonEncoder, resolve_instance_or_callable, resolve_obj_path 10 | 11 | 12 | class AbstractLogEntryTransformer(abc.ABC): # pylint: disable=too-few-public-methods 13 | """Transform RequestContext into a dictionary of logged attributes.""" 14 | @abc.abstractmethod 15 | def __call__(self, request_context: RequestContext) -> dict: 16 | raise NotImplementedError() 17 | 18 | 19 | class AbstractLogWriter(abc.ABC): # pylint: disable=too-few-public-methods 20 | """Writes the logged data to a destination.""" 21 | @abc.abstractmethod 22 | def __call__(self, logged_data: dict): 23 | raise NotImplementedError() 24 | 25 | 26 | class LogEntryTransformer(AbstractLogEntryTransformer): # pylint: disable=too-few-public-methods 27 | """ 28 | Transforms RequestContext into a dictionary of logged attributes. 29 | The mapping is provided as keyword arguments, where keys are the names of the 30 | logged attributes, and values are the paths to the attributes in RequestContext. 31 | """ 32 | def __init__(self, **kwargs): 33 | self.mapping = kwargs 34 | 35 | def __call__(self, request_context: RequestContext) -> dict: 36 | result = {} 37 | for key, path in self.mapping.items(): 38 | result[key] = resolve_obj_path(request_context, path) 39 | return result 40 | 41 | 42 | @dataclass 43 | class BaseLogger: 44 | """Base LLM request logger.""" 45 | log_writer: AbstractLogWriter | str | dict 46 | entry_transformer: AbstractLogEntryTransformer | str | dict = field(default=None) 47 | 48 | def __post_init__(self): 49 | self.entry_transformer = resolve_instance_or_callable( 50 | self.entry_transformer, 51 | debug_name="logging..entry_transformer", 52 | ) 53 | self.log_writer = resolve_instance_or_callable( 54 | self.log_writer, 55 | debug_name="logging..log_writer", 56 | ) 57 | 58 | def _transform(self, request_context: RequestContext) -> dict: 59 | return ( 60 | self.entry_transformer(request_context) 61 | if self.entry_transformer 62 | else request_context.to_dict() 63 | ) 64 | 65 | def __call__(self, request_context: RequestContext): 66 | self.log_writer(self._transform(request_context)) 67 | 68 | 69 | @dataclass 70 | class JsonLogWriter(AbstractLogWriter): 71 | """Writes logged data to a JSON file.""" 72 | file_name: str 73 | 74 | def __post_init__(self): 75 | dir_path = os.path.dirname(self.file_name) 76 | if dir_path: 77 | os.makedirs(dir_path, exist_ok=True) 78 | # Create the file if it doesn't exist 79 | with open(self.file_name, "a", encoding="utf-8"): 80 | pass 81 | 82 | def __call__(self, logged_data: dict): 83 | with open(self.file_name, "a", encoding="utf-8") as f: 84 | f.write(json.dumps(logged_data, cls=CustomJsonEncoder) + "\n") 85 | 86 | 87 | TLogger = Union[BaseLogger, Callable[[RequestContext], None]] 88 | -------------------------------------------------------------------------------- /.github/workflows/gito-react-to-comments.yml: -------------------------------------------------------------------------------- 1 | name: "Gito: React to GitHub comment" 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | permissions: 8 | contents: write # to make PR 9 | issues: write 10 | pull-requests: write 11 | # read: to download the code review artifact 12 | # write: to trigger other actions 13 | actions: write 14 | 15 | jobs: 16 | process-comment: 17 | if: | 18 | github.event.issue.pull_request && 19 | ( 20 | github.event.comment.author_association == 'OWNER' || 21 | github.event.comment.author_association == 'MEMBER' || 22 | github.event.comment.author_association == 'COLLABORATOR' 23 | ) && 24 | ( 25 | startsWith(github.event.comment.body, '/') || 26 | startsWith(github.event.comment.body, 'gito') || 27 | startsWith(github.event.comment.body, 'ai') || 28 | startsWith(github.event.comment.body, 'bot') || 29 | contains(github.event.comment.body, '@gito') || 30 | contains(github.event.comment.body, '@ai') || 31 | contains(github.event.comment.body, '@bot') 32 | ) 33 | runs-on: ubuntu-latest 34 | 35 | steps: 36 | - name: Get PR details 37 | id: pr 38 | uses: actions/github-script@v7 39 | with: 40 | script: | 41 | const pr = await github.rest.pulls.get({ 42 | owner: context.repo.owner, 43 | repo: context.repo.repo, 44 | pull_number: context.issue.number 45 | }); 46 | return { 47 | head_ref: pr.data.head.ref, 48 | head_sha: pr.data.head.sha, 49 | base_ref: pr.data.base.ref 50 | }; 51 | 52 | - name: Checkout repository 53 | uses: actions/checkout@v4 54 | with: 55 | repository: ${{ github.repository }} 56 | token: ${{ secrets.GITHUB_TOKEN }} 57 | ref: ${{ fromJson(steps.pr.outputs.result).head_ref }} 58 | fetch-depth: 0 59 | 60 | - name: Set up Python 61 | uses: actions/setup-python@v5 62 | with: { python-version: "3.13" } 63 | 64 | - name: Fetch Latest Gito Version 65 | id: gito-version 66 | run: pip index versions gito.bot 2>/dev/null | head -1 | sed -n 's/.* (\([^)]*\)).*/version=\1/p' >> $GITHUB_OUTPUT 67 | 68 | - uses: actions/cache@v4 69 | id: cache 70 | with: 71 | path: | 72 | ${{ env.pythonLocation }}/lib/python3.13/site-packages 73 | ${{ env.pythonLocation }}/bin 74 | key: gito_v${{ steps.gito-version.outputs.version }} 75 | 76 | - name: Install Gito 77 | if: steps.cache.outputs.cache-hit != 'true' 78 | run: pip install gito.bot~=3.2 79 | 80 | - name: Run Gito react 81 | env: 82 | # LLM config is needed only if answer_github_comments = true in .gito/config.toml 83 | # Otherwise, use LLM_API_TYPE: none 84 | LLM_API_TYPE: open_ai 85 | LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} 86 | MODEL: gpt-4.1 87 | JIRA_TOKEN: ${{ secrets.JIRA_TOKEN }} 88 | JIRA_URL: ${{ secrets.JIRA_URL }} 89 | JIRA_USER: ${{ secrets.JIRA_USER }} 90 | LINEAR_API_KEY: ${{ secrets.LINEAR_API_KEY }} 91 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 92 | run: | 93 | gito react-to-comment ${{ github.event.comment.id }} -------------------------------------------------------------------------------- /lm_proxy/api_key_check/with_request.py: -------------------------------------------------------------------------------- 1 | """ 2 | API key check implementation using HTTP requests. 3 | """ 4 | from typing import Optional 5 | from dataclasses import dataclass, field 6 | import requests 7 | 8 | from ..config import TApiKeyCheckFunc 9 | 10 | 11 | @dataclass(slots=True) 12 | class CheckAPIKeyWithRequest: # pylint: disable=too-many-instance-attributes 13 | """ 14 | Validates a Client API key by making an HTTP request to a specified URL. 15 | """ 16 | url: str = field() 17 | method: str = field(default="get") 18 | headers: dict = field(default_factory=dict) 19 | response_as_user_info: bool = field(default=False) 20 | group_field: Optional[str] = field(default=None) 21 | """ 22 | Field in the JSON response to extract the user group. 23 | """ 24 | default_group: str = field(default="default") 25 | """ 26 | User group to assign if group_field is not used. 27 | """ 28 | key_placeholder: str = field(default="{api_key}") 29 | use_cache: bool = field(default=False) 30 | """ 31 | Whether to cache the results of API key checks. 32 | Requires 'cachetools' package if set to True. 33 | """ 34 | cache_size: int = field(default=1024 * 16) 35 | cache_ttl: int = field(default=60 * 5) # 5 minutes 36 | timeout: int = field(default=5) # seconds 37 | _func: TApiKeyCheckFunc = field(init=False, repr=False) 38 | 39 | def __post_init__(self): 40 | def check_func(api_key: str) -> Optional[tuple[str, dict]]: 41 | try: 42 | url = self.url.replace(self.key_placeholder, api_key) 43 | headers = { 44 | k: str(v).replace(self.key_placeholder, api_key) 45 | for k, v in self.headers.items() 46 | } 47 | response = requests.request( 48 | method=self.method, 49 | url=url, 50 | headers=headers, 51 | timeout=self.timeout 52 | ) 53 | response.raise_for_status() 54 | group = self.default_group 55 | user_info = None 56 | if self.response_as_user_info: 57 | user_info = response.json() 58 | if self.group_field: 59 | group = user_info.get(self.group_field, self.default_group) 60 | return group, user_info 61 | except requests.exceptions.RequestException: 62 | return None 63 | 64 | if self.use_cache: 65 | try: 66 | import cachetools # pylint: disable=import-outside-toplevel 67 | except ImportError as e: 68 | raise ImportError( 69 | "Missing optional dependency 'cachetools'. " 70 | "Using 'lm_proxy.api_key_check.CheckAPIKeyWithRequest' with 'use_cache = true' " 71 | "requires installing 'cachetools' package. " 72 | "\nPlease install it with following command: 'pip install cachetools'" 73 | ) from e 74 | cache = cachetools.TTLCache(maxsize=self.cache_size, ttl=self.cache_ttl) 75 | self._func = cachetools.cached(cache)(check_func) 76 | else: 77 | self._func = check_func 78 | 79 | def __call__(self, api_key: str) -> Optional[tuple[str, dict]]: 80 | return self._func(api_key) 81 | -------------------------------------------------------------------------------- /lm_proxy/utils.py: -------------------------------------------------------------------------------- 1 | """Common usage utility functions.""" 2 | import os 3 | import json 4 | import inspect 5 | import logging 6 | from typing import Any, Callable, Union 7 | from datetime import datetime, date, time 8 | 9 | from microcore.utils import resolve_callable 10 | from starlette.requests import Request 11 | 12 | 13 | def resolve_obj_path(obj, path: str, default=None): 14 | """ 15 | Resolves dotted path supporting 16 | attributes, dict keys and list indices. 17 | """ 18 | for part in path.split("."): 19 | try: 20 | if isinstance(obj, dict): 21 | obj = obj[part] 22 | elif isinstance(obj, list): 23 | part = int(part) # Convert to int for list indexing 24 | obj = obj[part] 25 | else: 26 | obj = getattr(obj, part) 27 | except (AttributeError, KeyError, TypeError, ValueError, IndexError): 28 | return default 29 | return obj 30 | 31 | 32 | def resolve_instance_or_callable( 33 | item: Union[str, Callable, dict, object], 34 | class_key: str = "class", 35 | debug_name: str = None, 36 | allow_types: list[type] = None, 37 | ) -> Callable | object | None: 38 | """ 39 | Resolves a class instance or callable from various configuration formats. 40 | """ 41 | if item is None or item == "": 42 | return None 43 | if isinstance(item, dict): 44 | if class_key not in item: 45 | raise ValueError( 46 | f"'{class_key}' key is missing in {debug_name or 'item'} config: {item}" 47 | ) 48 | args = dict(item) 49 | class_name = args.pop(class_key) 50 | constructor = resolve_callable(class_name) 51 | return constructor(**args) 52 | if isinstance(item, str): 53 | fn = resolve_callable(item) 54 | return fn() if inspect.isclass(fn) else fn 55 | if callable(item): 56 | return item() if inspect.isclass(item) else item 57 | if allow_types and any(isinstance(item, t) for t in allow_types): 58 | return item 59 | raise ValueError(f"Invalid {debug_name or 'item'} config: {item}") 60 | 61 | 62 | class CustomJsonEncoder(json.JSONEncoder): 63 | """ 64 | Custom JSON encoder that handles datetime / date / time, pydantic models, etc. 65 | """ 66 | def default(self, o): 67 | if isinstance(o, (datetime, date, time)): 68 | return o.isoformat() 69 | if hasattr(o, "model_dump"): 70 | return o.model_dump() 71 | if hasattr(o, "dict"): 72 | return o.dict() 73 | if hasattr(o, "__dict__"): 74 | return o.__dict__ 75 | return super().default(o) 76 | 77 | 78 | def get_client_ip(request: Request) -> str: 79 | """ 80 | Extract the client's IP address from the request. 81 | """ 82 | # Try different headers in order of preference 83 | if forwarded_for := request.headers.get("X-Forwarded-For"): 84 | return forwarded_for.split(",")[0].strip() 85 | if real_ip := request.headers.get("X-Real-IP"): 86 | return real_ip 87 | if forwarded := request.headers.get("Forwarded"): 88 | # Parse Forwarded header (RFC 7239) 89 | return forwarded.split("for=")[1].split(";")[0].strip() 90 | 91 | # Fallback to direct client 92 | return request.client.host if request.client else "unknown" 93 | 94 | 95 | def replace_env_strings_recursive(data: Any) -> Any: 96 | """ 97 | Recursively traverses dicts and lists, replacing all string values 98 | that start with 'env:' with the corresponding environment variable. 99 | For example, a string "env:VAR_NAME" will be replaced by the value of the 100 | environment variable "VAR_NAME". 101 | """ 102 | if isinstance(data, dict): 103 | return {k: replace_env_strings_recursive(v) for k, v in data.items()} 104 | if isinstance(data, list): 105 | return [replace_env_strings_recursive(i) for i in data] 106 | if isinstance(data, str) and data.startswith("env:"): 107 | env_var_name = data[4:] 108 | if env_var_name not in os.environ: 109 | logging.warning("Environment variable '%s' not found", env_var_name) 110 | return os.environ.get(env_var_name, "") 111 | return data 112 | -------------------------------------------------------------------------------- /lm_proxy/bootstrap.py: -------------------------------------------------------------------------------- 1 | """Initialization and bootstrapping.""" 2 | import sys 3 | import logging 4 | import inspect 5 | from os import PathLike 6 | from datetime import datetime 7 | from typing import TYPE_CHECKING 8 | 9 | import microcore as mc 10 | from microcore import ui 11 | from microcore.configuration import get_bool_from_env 12 | from dotenv import load_dotenv 13 | 14 | from .config import Config 15 | from .utils import resolve_instance_or_callable 16 | 17 | if TYPE_CHECKING: 18 | from .loggers import TLogger 19 | 20 | 21 | def setup_logging(log_level: int = logging.INFO): 22 | """Setup logging format and level.""" 23 | class CustomFormatter(logging.Formatter): 24 | """Custom log formatter with colouring.""" 25 | def format(self, record): 26 | dt = datetime.fromtimestamp(record.created).strftime("%H:%M:%S") 27 | message, level_name = record.getMessage(), record.levelname 28 | if record.levelno == logging.WARNING: 29 | message = mc.ui.yellow(message) 30 | level_name = mc.ui.yellow(level_name) 31 | if record.levelno >= logging.ERROR: 32 | message = mc.ui.red(message) 33 | level_name = mc.ui.red(level_name) 34 | return f"{dt} {level_name}: {message}" 35 | 36 | handler = logging.StreamHandler() 37 | handler.setFormatter(CustomFormatter()) 38 | logging.basicConfig(level=log_level, handlers=[handler]) 39 | 40 | 41 | class Env: 42 | """Runtime environment singleton.""" 43 | config: Config 44 | connections: dict[str, mc.types.LLMAsyncFunctionType] 45 | debug: bool 46 | components: dict 47 | loggers: list["TLogger"] 48 | 49 | def _init_components(self): 50 | self.components = {} 51 | for name, component_data in self.config.components.items(): 52 | self.components[name] = resolve_instance_or_callable(component_data) 53 | logging.info("Component initialized: '%s'.", name) 54 | 55 | @staticmethod 56 | def init(config: Config | str | PathLike, debug: bool = False): 57 | """Initializes the LM-Proxy runtime environment singleton.""" 58 | env.debug = debug 59 | 60 | if not isinstance(config, Config): 61 | if isinstance(config, (str, PathLike)): 62 | config = Config.load(config) 63 | else: 64 | raise ValueError("config must be a path (str or PathLike) or Config instance") 65 | env.config = config 66 | 67 | env._init_components() 68 | 69 | env.loggers = [resolve_instance_or_callable(logger) for logger in env.config.loggers] 70 | 71 | # initialize connections 72 | env.connections = {} 73 | for conn_name, conn_config in env.config.connections.items(): 74 | logging.info("Initializing '%s' LLM proxy connection...", conn_name) 75 | try: 76 | if inspect.iscoroutinefunction(conn_config): 77 | env.connections[conn_name] = conn_config 78 | elif isinstance(conn_config, str): 79 | env.connections[conn_name] = resolve_instance_or_callable(conn_config) 80 | else: 81 | mc.configure( 82 | **conn_config, EMBEDDING_DB_TYPE=mc.EmbeddingDbType.NONE 83 | ) 84 | env.connections[conn_name] = mc.env().llm_async_function 85 | except mc.LLMConfigError as e: 86 | raise ValueError( 87 | f"Error in configuration for connection '{conn_name}': {e}" 88 | ) from e 89 | 90 | logging.info("Done initializing %d connections.", len(env.connections)) 91 | 92 | 93 | env = Env() 94 | 95 | 96 | def bootstrap(config: str | Config = "config.toml", env_file: str = ".env", debug=None): 97 | """Bootstraps the LM-Proxy environment.""" 98 | def log_bootstrap(): 99 | cfg_val = 'dynamic' if isinstance(config, Config) else ui.blue(config) 100 | cfg_line = f"\n - Config{ui.gray('......')}[ {cfg_val} ]" 101 | env_line = f"\n - Env. File{ui.gray('...')}[ {ui.blue(env_file)} ]" if env_file else "" 102 | dbg_line = f"\n - Debug{ui.gray('.......')}[ {ui.yellow('On')} ]" if debug else "" 103 | message = f"Bootstrapping {ui.magenta('LM-Proxy')}...{cfg_line}{env_line}{dbg_line}" 104 | logging.info(message) 105 | 106 | if env_file: 107 | load_dotenv(env_file, override=True) 108 | if debug is None: 109 | debug = "--debug" in sys.argv or get_bool_from_env("LM_PROXY_DEBUG", False) 110 | setup_logging(logging.DEBUG if debug else logging.INFO) 111 | mc.logging.LoggingConfig.OUTPUT_METHOD = logging.info 112 | log_bootstrap() 113 | Env.init(config, debug=debug) 114 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from types import SimpleNamespace 4 | 5 | import pytest 6 | from starlette.requests import Request 7 | 8 | from lm_proxy.utils import ( 9 | resolve_instance_or_callable, 10 | replace_env_strings_recursive, 11 | resolve_obj_path, 12 | get_client_ip, 13 | ) 14 | 15 | 16 | def test_resolve_instance_or_callable(): 17 | assert resolve_instance_or_callable(None) is None 18 | 19 | obj1, obj2 = object(), object() 20 | ins = resolve_instance_or_callable(obj1, allow_types=[object]) 21 | assert ins is obj1 and ins is not obj2 22 | 23 | with pytest.raises(ValueError): 24 | resolve_instance_or_callable(123) 25 | 26 | with pytest.raises(ValueError): 27 | resolve_instance_or_callable([]) 28 | 29 | with pytest.raises(ValueError): 30 | resolve_instance_or_callable({}) 31 | 32 | assert resolve_instance_or_callable(lambda: 42)() == 42 33 | 34 | class MyClass: 35 | def __init__(self, value=0): 36 | self.value = value 37 | 38 | res = resolve_instance_or_callable(lambda: MyClass(10), allow_types=[MyClass]) 39 | assert not isinstance(res, MyClass) and res().value == 10 40 | 41 | ins = resolve_instance_or_callable(MyClass(20), allow_types=[MyClass]) 42 | assert isinstance(ins, MyClass) and ins.value == 20 43 | assert resolve_instance_or_callable( 44 | "lm_proxy.utils.resolve_instance_or_callable" 45 | ) is resolve_instance_or_callable 46 | 47 | ins = resolve_instance_or_callable({ 48 | 'class': 'lm_proxy.loggers.JsonLogWriter', 49 | 'file_name': 'test.log' 50 | }) 51 | assert ins.__class__.__name__ == 'JsonLogWriter' and ins.file_name == 'test.log' 52 | 53 | 54 | def test_replace_env_strings_recursive(caplog): 55 | os.environ['TEST_VAR1'] = 'env_value1' 56 | os.environ['TEST_VAR2'] = 'env_value2' 57 | assert replace_env_strings_recursive("env:TEST_VAR1") == 'env_value1' 58 | 59 | caplog.set_level(logging.WARNING) 60 | assert replace_env_strings_recursive("env:NON_EXIST") == '' 61 | assert len(caplog.records) == 1 62 | 63 | assert replace_env_strings_recursive([["env:TEST_VAR1"]]) == [['env_value1']] 64 | assert replace_env_strings_recursive( 65 | {"data": {"field": "env:TEST_VAR1"}} 66 | ) == {"data": {"field": "env_value1"}} 67 | 68 | 69 | def test_resolve_obj_path(): 70 | o = SimpleNamespace( 71 | a=SimpleNamespace( 72 | b=dict( 73 | c=[None, lambda x: x * 2] 74 | ) 75 | ) 76 | ) 77 | assert resolve_obj_path(o, "a.b.c.1")(10) == 20 78 | assert resolve_obj_path(o, "a.b.cc.1", "no") == "no" 79 | 80 | 81 | def test_get_client_ip(): 82 | request = Request(scope={ 83 | "type": "http", 84 | "headers": [], 85 | }) 86 | assert get_client_ip(request) == "unknown" 87 | 88 | request = Request(scope={ 89 | "type": "http", 90 | "headers": [(b"x-forwarded-for", b"192.168.1.1")], 91 | }) 92 | assert get_client_ip(request) == "192.168.1.1" 93 | 94 | request = Request(scope={ 95 | "type": "http", 96 | "headers": [(b"x-forwarded-for", b"192.168.1.1, 10.0.0.2")], 97 | }) 98 | assert get_client_ip(request) == "192.168.1.1" # should take the first IP 99 | 100 | request = Request(scope={ 101 | "type": "http", 102 | "headers": [(b"x-real-ip", b"203.0.113.5")], 103 | }) 104 | assert get_client_ip(request) == "203.0.113.5" 105 | 106 | request = Request(scope={ 107 | "type": "http", 108 | "headers": [], 109 | "client": ("127.0.0.1", 12345), 110 | }) 111 | assert get_client_ip(request) == "127.0.0.1" 112 | 113 | request = Request(scope={ 114 | "type": "http", 115 | "headers": [ 116 | (b"x-real-ip", b"203.0.113.5"), 117 | (b"x-forwarded-for", b"192.168.1.1, 10.0.0.2"), 118 | ], 119 | }) 120 | assert get_client_ip(request) == "192.168.1.1" # x-forwarded-for has priority 121 | 122 | # RFC 7239 Forwarded header 123 | result = get_client_ip(Request(scope={ 124 | "type": "http", 125 | "headers": [(b"forwarded", b"for=192.0.2.60;proto=http;by=203.0.113.43")], 126 | })) 127 | assert result == "192.0.2.60" 128 | 129 | # IPv6 address 130 | assert get_client_ip(Request(scope={ 131 | "type": "http", 132 | "headers": [ 133 | (b"user-agent", b"Mozilla/5.0"), 134 | (b"x-forwarded-for", b"2001:0db8:85a3:0000:0000:8a2e:0370:7334"), 135 | (b"content-type", b"application/json"), 136 | ], 137 | })) == "2001:0db8:85a3:0000:0000:8a2e:0370:7334" 138 | 139 | """Test when client IP is in scope""" 140 | assert get_client_ip(Request(scope={ 141 | "type": "http", 142 | "headers": [], 143 | "client": ("192.168.1.100", 8080), 144 | })) == "192.168.1.100" 145 | -------------------------------------------------------------------------------- /tests/configs/test_disabled.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from types import SimpleNamespace 4 | 5 | import pytest 6 | from starlette.requests import Request 7 | 8 | from lm_proxy.utils import ( 9 | resolve_instance_or_callable, 10 | replace_env_strings_recursive, 11 | resolve_obj_path, 12 | get_client_ip, 13 | ) 14 | 15 | 16 | def test_resolve_instance_or_callable(): 17 | assert resolve_instance_or_callable(None) is None 18 | 19 | obj1, obj2 = object(), object() 20 | ins = resolve_instance_or_callable(obj1, allow_types=[object]) 21 | assert ins is obj1 and ins is not obj2 22 | 23 | with pytest.raises(ValueError): 24 | resolve_instance_or_callable(123) 25 | 26 | with pytest.raises(ValueError): 27 | resolve_instance_or_callable([]) 28 | 29 | with pytest.raises(ValueError): 30 | resolve_instance_or_callable({}) 31 | 32 | assert resolve_instance_or_callable(lambda: 42)() == 42 33 | 34 | class MyClass: 35 | def __init__(self, value=0): 36 | self.value = value 37 | 38 | res = resolve_instance_or_callable(lambda: MyClass(10), allow_types=[MyClass]) 39 | assert not isinstance(res, MyClass) and res().value == 10 40 | 41 | ins = resolve_instance_or_callable(MyClass(20), allow_types=[MyClass]) 42 | assert isinstance(ins, MyClass) and ins.value == 20 43 | assert resolve_instance_or_callable( 44 | "lm_proxy.utils.resolve_instance_or_callable" 45 | ) is resolve_instance_or_callable 46 | 47 | ins = resolve_instance_or_callable({ 48 | 'class': 'lm_proxy.loggers.JsonLogWriter', 49 | 'file_name': 'test.log' 50 | }) 51 | assert ins.__class__.__name__ == 'JsonLogWriter' and ins.file_name == 'test.log' 52 | 53 | 54 | def test_replace_env_strings_recursive(caplog): 55 | os.environ['TEST_VAR1'] = 'env_value1' 56 | os.environ['TEST_VAR2'] = 'env_value2' 57 | assert replace_env_strings_recursive("env:TEST_VAR1") == 'env_value1' 58 | 59 | caplog.set_level(logging.WARNING) 60 | assert replace_env_strings_recursive("env:NON_EXIST") == '' 61 | assert len(caplog.records) == 1 62 | 63 | assert replace_env_strings_recursive([["env:TEST_VAR1"]]) == [['env_value1']] 64 | assert replace_env_strings_recursive( 65 | {"data": {"field": "env:TEST_VAR1"}} 66 | ) == {"data": {"field": "env_value1"}} 67 | 68 | 69 | def test_resolve_obj_path(): 70 | o = SimpleNamespace( 71 | a=SimpleNamespace( 72 | b=dict( 73 | c=[None, lambda x: x * 2] 74 | ) 75 | ) 76 | ) 77 | assert resolve_obj_path(o, "a.b.c.1")(10) == 20 78 | assert resolve_obj_path(o, "a.b.cc.1", "no") == "no" 79 | 80 | 81 | def test_get_client_ip(): 82 | request = Request(scope={ 83 | "type": "http", 84 | "headers": [], 85 | }) 86 | assert get_client_ip(request) == "unknown" 87 | 88 | request = Request(scope={ 89 | "type": "http", 90 | "headers": [(b"x-forwarded-for", b"192.168.1.1")], 91 | }) 92 | assert get_client_ip(request) == "192.168.1.1" 93 | 94 | request = Request(scope={ 95 | "type": "http", 96 | "headers": [(b"x-forwarded-for", b"192.168.1.1, 10.0.0.2")], 97 | }) 98 | assert get_client_ip(request) == "192.168.1.1" # should take the first IP 99 | 100 | request = Request(scope={ 101 | "type": "http", 102 | "headers": [(b"x-real-ip", b"203.0.113.5")], 103 | }) 104 | assert get_client_ip(request) == "203.0.113.5" 105 | 106 | request = Request(scope={ 107 | "type": "http", 108 | "headers": [], 109 | "client": ("127.0.0.1", 12345), 110 | }) 111 | assert get_client_ip(request) == "127.0.0.1" 112 | 113 | request = Request(scope={ 114 | "type": "http", 115 | "headers": [ 116 | (b"x-real-ip", b"203.0.113.5"), 117 | (b"x-forwarded-for", b"192.168.1.1, 10.0.0.2"), 118 | ], 119 | }) 120 | assert get_client_ip(request) == "192.168.1.1" # x-forwarded-for has priority 121 | 122 | # RFC 7239 Forwarded header 123 | result = get_client_ip(Request(scope={ 124 | "type": "http", 125 | "headers": [(b"forwarded", b"for=192.0.2.60;proto=http;by=203.0.113.43")], 126 | })) 127 | assert result == "192.0.2.60" 128 | 129 | # IPv6 address 130 | assert get_client_ip(Request(scope={ 131 | "type": "http", 132 | "headers": [ 133 | (b"user-agent", b"Mozilla/5.0"), 134 | (b"x-forwarded-for", b"2001:0db8:85a3:0000:0000:8a2e:0370:7334"), 135 | (b"content-type", b"application/json"), 136 | ], 137 | })) == "2001:0db8:85a3:0000:0000:8a2e:0370:7334" 138 | 139 | """Test when client IP is in scope""" 140 | assert get_client_ip(Request(scope={ 141 | "type": "http", 142 | "headers": [], 143 | "client": ("192.168.1.100", 8080), 144 | })) == "192.168.1.100" 145 | -------------------------------------------------------------------------------- /lm_proxy/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration models for LM-Proxy settings. 3 | This module defines Pydantic models that match the structure of config.toml. 4 | """ 5 | 6 | import os 7 | from enum import StrEnum 8 | from typing import Union, Callable, Dict, Optional 9 | from importlib.metadata import entry_points 10 | 11 | from pydantic import BaseModel, Field, ConfigDict 12 | 13 | from .utils import resolve_instance_or_callable, replace_env_strings_recursive 14 | from .loggers import TLogger 15 | 16 | 17 | class ModelListingMode(StrEnum): 18 | """ 19 | Enum for model listing modes in the /models endpoint. 20 | """ 21 | 22 | # Show all models from API provider matching the patterns (not implemented yet) 23 | EXPAND_WILDCARDS = "expand_wildcards" 24 | # Ignore wildcard models, show only exact model names 25 | # (keys of the config.routing dict not containing * or ?) 26 | IGNORE_WILDCARDS = "ignore_wildcards" 27 | # Show everything as is, including wildcard patterns 28 | AS_IS = "as_is" 29 | 30 | 31 | class Group(BaseModel): 32 | """User group configuration.""" 33 | api_keys: list[str] = Field(default_factory=list) 34 | allowed_connections: str = Field(default="*") # Comma-separated list or "*" 35 | 36 | def allows_connecting_to(self, connection_name: str) -> bool: 37 | """Check if the group allows access to the specified connection.""" 38 | if self.allowed_connections == "*": 39 | return True 40 | allowed = [c.strip() for c in self.allowed_connections.split(",") if c.strip()] 41 | return connection_name in allowed 42 | 43 | 44 | TApiKeyCheckResult = Optional[Union[str, tuple[str, dict]]] 45 | TApiKeyCheckFunc = Callable[[str | None], TApiKeyCheckResult] 46 | 47 | 48 | class Config(BaseModel): 49 | """Main configuration model matching config.toml structure.""" 50 | 51 | model_config = ConfigDict( 52 | extra="forbid", 53 | arbitrary_types_allowed=True, 54 | ) 55 | enabled: bool = True 56 | host: str = "0.0.0.0" 57 | port: int = 8000 58 | ssl_keyfile: str | None = None 59 | """ Path to SSL key file for HTTPS support, if None, HTTP is used. """ 60 | ssl_certfile: str | None = None 61 | """ Path to SSL certificate file for HTTPS support, if None, HTTP is used. """ 62 | api_prefix: str = "/v1" 63 | """ Prefix for API endpoints, default is /v1 """ 64 | dev_autoreload: bool = False 65 | connections: dict[str, Union[dict, Callable, str]] = Field( 66 | ..., # Required field (no default) 67 | description="Dictionary of connection configurations", 68 | examples=[{"openai": {"api_key": "sk-..."}}], 69 | ) 70 | routing: dict[str, str] = Field(default_factory=dict) 71 | """ model_name_pattern* => connection_name.< model | * >, example: {"gpt-*": "oai.*"} """ 72 | groups: dict[str, Group] = Field(default_factory=lambda: {"default": Group()}) 73 | api_key_check: Union[str, TApiKeyCheckFunc, dict] = Field( 74 | default="lm_proxy.api_key_check.check_api_key_in_config", 75 | description="Function to check Virtual API keys", 76 | ) 77 | loggers: list[Union[str, dict, TLogger]] = Field(default_factory=list) 78 | encryption_key: str = Field( 79 | default="Eclipse", 80 | description="Key for encrypting sensitive data (must be explicitly set)", 81 | ) 82 | model_listing_mode: ModelListingMode = Field( 83 | default=ModelListingMode.AS_IS, 84 | description="How to handle wildcard models in /models endpoint", 85 | ) 86 | model_info: dict[str, dict] = Field( 87 | default_factory=dict, 88 | description="Additional metadata for /models endpoint", 89 | ) 90 | components: dict[str, Union[str, Callable, dict]] = Field(default_factory=dict) 91 | 92 | def __init__(self, **data): 93 | super().__init__(**data) 94 | self.api_key_check = resolve_instance_or_callable( 95 | self.api_key_check, 96 | debug_name="check_api_key", 97 | ) 98 | 99 | @staticmethod 100 | def _load_raw(config_path: str | os.PathLike = "config.toml") -> Union["Config", Dict]: 101 | config_ext = os.path.splitext(config_path)[1].lower().lstrip(".") 102 | for entry_point in entry_points(group="config.loaders"): 103 | if config_ext == entry_point.name: 104 | loader = entry_point.load() 105 | config_data = loader(config_path) 106 | return config_data 107 | 108 | raise ValueError(f"No loader found for configuration file extension: {config_ext}") 109 | 110 | @staticmethod 111 | def load(config_path: str | os.PathLike = "config.toml") -> "Config": 112 | """ 113 | Load configuration from a TOML or Python file. 114 | 115 | Args: 116 | config_path: Path to the config.toml file 117 | 118 | Returns: 119 | Config object with parsed configuration 120 | """ 121 | config = Config._load_raw(config_path) 122 | if isinstance(config, dict): 123 | config = replace_env_strings_recursive(config) 124 | config = Config(**config) 125 | elif not isinstance(config, Config): 126 | raise TypeError("Loaded configuration must be a dict or Config instance") 127 | return config 128 | -------------------------------------------------------------------------------- /lm_proxy/core.py: -------------------------------------------------------------------------------- 1 | """Core LM-Proxy logic""" 2 | import asyncio 3 | import fnmatch 4 | import json 5 | import logging 6 | import secrets 7 | import time 8 | import hashlib 9 | from datetime import datetime 10 | from typing import Optional 11 | 12 | from fastapi import HTTPException 13 | from starlette.requests import Request 14 | from starlette.responses import JSONResponse, Response, StreamingResponse 15 | 16 | from .base_types import ChatCompletionRequest, RequestContext 17 | from .bootstrap import env 18 | from .config import Config 19 | from .utils import get_client_ip 20 | 21 | 22 | def parse_routing_rule(rule: str, config: Config) -> tuple[str, str]: 23 | """ 24 | Parses a routing rule in the format 'connection.model' or 'connection.*'. 25 | Returns a tuple of (connection_name, model_part). 26 | Args: 27 | rule (str): The routing rule string. 28 | config (Config): The configuration object containing defined connections. 29 | Raises: 30 | ValueError: If the rule format is invalid or the connection is unknown. 31 | """ 32 | if "." not in rule: 33 | raise ValueError( 34 | f"Invalid routing rule '{rule}'. Expected format: 'connection.model' or 'connection.*'" 35 | ) 36 | connection_name, model_part = rule.split(".", 1) 37 | if connection_name not in config.connections: 38 | raise ValueError( 39 | f"Routing selected unknown connection '{connection_name}'. " 40 | f"Defined connections: {', '.join(config.connections.keys()) or '(none)'}" 41 | ) 42 | return connection_name, model_part 43 | 44 | 45 | def resolve_connection_and_model( 46 | config: Config, external_model: str 47 | ) -> tuple[str, str]: 48 | """ 49 | Resolves the connection name and model name based on routing rules. 50 | Args: 51 | config (Config): The configuration object containing routing rules. 52 | external_model (str): The external model name from the request. 53 | """ 54 | for model_match, rule in config.routing.items(): 55 | if fnmatch.fnmatchcase(external_model, model_match): 56 | connection_name, model_part = parse_routing_rule(rule, config) 57 | resolved_model = external_model if model_part == "*" else model_part 58 | return connection_name, resolved_model 59 | 60 | raise ValueError( 61 | f"No routing rule matched model '{external_model}'. " 62 | 'Add a catch-all rule like "*" = "openai.gpt-3.5-turbo" if desired.' 63 | ) 64 | 65 | 66 | async def process_stream( 67 | async_llm_func, request: ChatCompletionRequest, llm_params, log_entry: RequestContext 68 | ): 69 | """ 70 | Streams the response from the LLM function. 71 | """ 72 | prompt = request.messages 73 | queue = asyncio.Queue() 74 | stream_id = f"chatcmpl-{secrets.token_hex(12)}" 75 | created = int(time.time()) 76 | 77 | async def callback(chunk): 78 | await queue.put(chunk) 79 | 80 | def make_chunk(delta=None, content=None, finish_reason=None, error=None) -> str: 81 | if delta is None: 82 | delta = {"content": str(content)} if content is not None else {} 83 | obj = { 84 | "id": stream_id, 85 | "object": "chat.completion.chunk", 86 | "created": created, 87 | "choices": [{"index": 0, "delta": delta}], 88 | } 89 | if error is not None: 90 | obj["error"] = {"message": str(error), "type": type(error).__name__} 91 | if finish_reason is None: 92 | finish_reason = "error" 93 | if finish_reason is not None: 94 | obj["choices"][0]["finish_reason"] = finish_reason 95 | return "data: " + json.dumps(obj) + "\n\n" 96 | 97 | task = asyncio.create_task(async_llm_func(prompt, **llm_params, callback=callback)) 98 | 99 | try: 100 | # Initial chunk: role 101 | yield make_chunk(delta={"role": "assistant"}) 102 | 103 | while not task.done(): 104 | try: 105 | block = await asyncio.wait_for(queue.get(), timeout=0.1) 106 | yield make_chunk(content=block) 107 | except asyncio.TimeoutError: 108 | continue 109 | 110 | # Drain any remaining 111 | while not queue.empty(): 112 | block = await queue.get() 113 | yield make_chunk(content=block) 114 | 115 | finally: 116 | try: 117 | result = await task 118 | log_entry.response = result 119 | except Exception as e: 120 | log_entry.error = e 121 | yield make_chunk(error={"message": str(e), "type": type(e).__name__}) 122 | 123 | if log_entry.error: 124 | yield make_chunk(finish_reason="error") 125 | else: 126 | yield make_chunk(finish_reason="stop") 127 | yield "data: [DONE]\n\n" 128 | await log_non_blocking(log_entry) 129 | if log_entry.error: 130 | if env.debug: 131 | raise log_entry.error 132 | logging.error(log_entry.error) 133 | 134 | 135 | def read_api_key(request: Request) -> str: 136 | """ 137 | Extracts the Bearer token from the Authorization header. 138 | returns '' if not present. 139 | """ 140 | auth = request.headers.get("authorization") 141 | if auth and auth.lower().startswith("bearer "): 142 | return auth[7:].strip() 143 | return "" 144 | 145 | 146 | def api_key_id(api_key: Optional[str]) -> str | None: 147 | """ 148 | Generates a consistent hashed identifier for the given API key. 149 | """ 150 | if not api_key: 151 | return None 152 | return hashlib.md5( 153 | (api_key + env.config.encryption_key).encode("utf-8") 154 | ).hexdigest() 155 | 156 | 157 | async def check(request: Request) -> tuple[str, str, dict]: 158 | """ 159 | API key and service availability check for endpoints. 160 | Args: 161 | request (Request): The incoming HTTP request object. 162 | Returns: 163 | tuple[str, str, dict]: A tuple containing the group name, the API key and user_info object. 164 | Raises: 165 | HTTPException: If the service is disabled or the API key is invalid. 166 | """ 167 | if not env.config.enabled: 168 | raise HTTPException( 169 | status_code=503, 170 | detail={ 171 | "error": { 172 | "message": "The service is disabled.", 173 | "type": "service_unavailable", 174 | "param": None, 175 | "code": "service_disabled", 176 | } 177 | }, 178 | ) 179 | api_key = read_api_key(request) 180 | result = (env.config.api_key_check)(api_key) 181 | if isinstance(result, tuple): 182 | group, user_info = result 183 | else: 184 | group: str | bool | None = result 185 | user_info = {} 186 | 187 | if not group: 188 | raise HTTPException( 189 | status_code=403, 190 | detail={ 191 | "error": { 192 | "message": "Incorrect API key provided: " 193 | "your API key is invalid, expired, or revoked.", 194 | "type": "invalid_request_error", 195 | "param": None, 196 | "code": "invalid_api_key", 197 | } 198 | }, 199 | ) 200 | return group, api_key, user_info 201 | 202 | 203 | async def chat_completions( 204 | request: ChatCompletionRequest, raw_request: Request 205 | ) -> Response: 206 | """ 207 | Endpoint for chat completions that mimics OpenAI's API structure. 208 | Streams the response from the LLM using microcore. 209 | """ 210 | group, api_key, user_info = await check(raw_request) 211 | llm_params = request.model_dump(exclude={"messages"}, exclude_none=True) 212 | connection, llm_params["model"] = resolve_connection_and_model( 213 | env.config, llm_params.get("model", "default_model") 214 | ) 215 | log_entry = RequestContext( 216 | request=request, 217 | api_key_id=api_key_id(api_key), 218 | group=group if isinstance(group, str) else None, 219 | remote_addr=get_client_ip(raw_request), 220 | connection=connection, 221 | model=llm_params["model"], 222 | user_info=user_info, 223 | ) 224 | logging.debug( 225 | "Resolved routing for [%s] --> connection: %s, model: %s", 226 | request.model, 227 | connection, 228 | llm_params["model"], 229 | ) 230 | 231 | if not env.config.groups[group].allows_connecting_to(connection): 232 | raise HTTPException( 233 | status_code=403, 234 | detail={ 235 | "error": { 236 | "message": f"Your API key does not allow using the '{connection}' connection.", 237 | "type": "invalid_request_error", 238 | "param": None, 239 | "code": "connection_not_allowed", 240 | } 241 | }, 242 | ) 243 | 244 | async_llm_func = env.connections[connection] 245 | 246 | logging.info("Querying LLM... params: %s", llm_params) 247 | if request.stream: 248 | return StreamingResponse( 249 | process_stream(async_llm_func, request, llm_params, log_entry), 250 | media_type="text/event-stream", 251 | ) 252 | 253 | try: 254 | out = await async_llm_func(request.messages, **llm_params) 255 | log_entry.response = out 256 | logging.info("LLM response: %s", out) 257 | except Exception as e: 258 | log_entry.error = e 259 | await log_non_blocking(log_entry) 260 | raise 261 | await log_non_blocking(log_entry) 262 | 263 | return JSONResponse( 264 | { 265 | "choices": [ 266 | { 267 | "index": 0, 268 | "message": {"role": "assistant", "content": str(out)}, 269 | "finish_reason": "stop", 270 | } 271 | ] 272 | } 273 | ) 274 | 275 | 276 | async def log(request_ctx: RequestContext): 277 | """ 278 | Creates log records for current request using all configured log handlers. 279 | """ 280 | if request_ctx.duration is None and request_ctx.created_at: 281 | request_ctx.duration = (datetime.now() - request_ctx.created_at).total_seconds() 282 | for handler in env.loggers: 283 | # check if it is async, then run both sync and async loggers in non-blocking way (sync too) 284 | if asyncio.iscoroutinefunction(handler): 285 | asyncio.create_task(handler(request_ctx)) 286 | else: 287 | try: 288 | handler(request_ctx) 289 | except Exception as e: 290 | logging.error("Error in logger handler: %s", e) 291 | raise e 292 | 293 | 294 | async def log_non_blocking( 295 | request_ctx: RequestContext, 296 | ) -> Optional[asyncio.Task]: 297 | """ 298 | Non-blocking log function that schedules logging as an asynchronous task. 299 | """ 300 | if env.loggers: 301 | task = asyncio.create_task(log(request_ctx)) 302 | return task 303 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

LM-Proxy

2 |

3 | Lightweight, OpenAI-compatible HTTP proxy server / gateway
unifying access to multiple Large Language Model providers and local inference
through a single, standardized API endpoint. 4 |

5 |

6 | PyPI 7 | Tests 8 | Code Style 9 | Code Coverage 10 | 11 | License 12 |

13 | 14 | Built with Python, FastAPI and [MicroCore](https://github.com/Nayjest/ai-microcore), **LM-Proxy** seamlessly integrates cloud providers like Google, Anthropic, and OpenAI, as well as local PyTorch-based inference, while maintaining full compatibility with OpenAI's API format. 15 | 16 | It works as a drop-in replacement for OpenAI's API, allowing you to switch between cloud providers and local models without modifying your existing client code. 17 | 18 | **LM-Proxy** supports **real-time token streaming**, **secure Virtual API key management**, and can be used both as an importable Python library and as a standalone HTTP service. Whether you're building production applications or experimenting with different models, LM-Proxy eliminates integration complexity and keeps your codebase **provider-agnostic**. 19 | 20 | 21 | ## Table of Contents 22 | - [Overview](#lm-proxy) 23 | - [Features](#-features) 24 | - [Getting Started](#-getting-started) 25 | - [Installation](#installation) 26 | - [Quick Start](#quick-start) 27 | - [Configuration](#-configuration) 28 | - [Basic Structure](#basic-structure) 29 | - [Environment Variables](#environment-variables) 30 | - [Proxy API Keys vs. Provider API Keys](#-proxy-api-keys-vs-provider-api-keys) 31 | - [API Usage](#-api-usage) 32 | - [Chat Completions Endpoint](#chat-completions-endpoint) 33 | - [Models List Endpoint](#models-list-endpoint) 34 | - [User Groups Configuration](#-user-groups-configuration) 35 | - [Basic Group Definition](#basic-group-definition) 36 | - [Group-based Access Control](#group-based-access-control) 37 | - [Connection Restrictions](#connection-restrictions) 38 | - [Virtual API Key Validation](#virtual-api-key-validation) 39 | - [Advanced Usage](#%EF%B8%8F-advanced-usage) 40 | - [Dynamic Model Routing](#dynamic-model-routing) 41 | - [Load Balancing Example](#load-balancing-example) 42 | - [Google Vertex AI Example](#google-vertex-ai-configuration-example) 43 | - [Using Tokens from OIDC Provider as Virtual/Client API Keys](#using-tokens-from-oidc-provider-as-virtualclient-api-keys) 44 | - [Add-on Components](#add-on-components) 45 | - [Database Connector](#database-connector) 46 | - [Debugging](#-debugging) 47 | - [Contributing](#-contributing) 48 | - [License](#-license) 49 | 50 | ## ✨ Features 51 | 52 | - **Provider Agnostic**: Connect to OpenAI, Anthropic, Google AI, local models, and more using a single API 53 | - **Unified Interface**: Access all models through the standard OpenAI API format 54 | - **Dynamic Routing**: Route requests to different LLM providers based on model name patterns 55 | - **Stream Support**: Full streaming support for real-time responses 56 | - **API Key Management**: Configurable API key validation and access control 57 | - **Easy Configuration**: Simple TOML/YAML/JSON/Python configuration files for setup 58 | - **Extensible by Design**: Minimal core with clearly defined extension points, enabling seamless customization and expansion without modifying the core system. 59 | 60 | ## 🚀 Getting Started 61 | 62 | ### Requirements 63 | Python 3.11 | 3.12 | 3.13 64 | 65 | ### Installation 66 | 67 | ```bash 68 | pip install lm-proxy 69 | ``` 70 | 71 | ### Quick Start 72 | 73 | #### 1. Create a `config.toml` file: 74 | 75 | ```toml 76 | host = "0.0.0.0" 77 | port = 8000 78 | 79 | [connections] 80 | [connections.openai] 81 | api_type = "open_ai" 82 | api_base = "https://api.openai.com/v1/" 83 | api_key = "env:OPENAI_API_KEY" 84 | 85 | [connections.anthropic] 86 | api_type = "anthropic" 87 | api_key = "env:ANTHROPIC_API_KEY" 88 | 89 | [routing] 90 | "gpt*" = "openai.*" 91 | "claude*" = "anthropic.*" 92 | "*" = "openai.gpt-3.5-turbo" 93 | 94 | [groups.default] 95 | api_keys = ["YOUR_API_KEY_HERE"] 96 | ``` 97 | > **Note** ℹ️ 98 | > To enhance security, consider storing upstream API keys in operating system environment variables rather than embedding them directly in the configuration file. You can reference these variables in the configuration using the env: syntax. 99 | 100 | #### 2. Start the server: 101 | 102 | ```bash 103 | lm-proxy 104 | ``` 105 | Alternatively, run it as a Python module: 106 | ```bash 107 | python -m lm_proxy 108 | ``` 109 | 110 | #### 3. Use it with any OpenAI-compatible client: 111 | 112 | ```python 113 | from openai import OpenAI 114 | 115 | client = OpenAI( 116 | api_key="YOUR_API_KEY_HERE", 117 | base_url="http://localhost:8000/v1" 118 | ) 119 | 120 | completion = client.chat.completions.create( 121 | model="gpt-5", # This will be routed to OpenAI based on config 122 | messages=[{"role": "user", "content": "Hello, world!"}] 123 | ) 124 | print(completion.choices[0].message.content) 125 | ``` 126 | 127 | Or use the same endpoint with Claude models: 128 | 129 | ```python 130 | completion = client.chat.completions.create( 131 | model="claude-opus-4-1-20250805", # This will be routed to Anthropic based on config 132 | messages=[{"role": "user", "content": "Hello, world!"}] 133 | ) 134 | ``` 135 | 136 | ## 📝 Configuration 137 | 138 | LM-Proxy is configured through a TOML/YAML/JSON/Python file that specifies connections, routing rules, and access control. 139 | 140 | ### Basic Structure 141 | 142 | ```toml 143 | host = "0.0.0.0" # Interface to bind to 144 | port = 8000 # Port to listen on 145 | dev_autoreload = false # Enable for development 146 | 147 | # API key validation function (optional) 148 | api_key_check = "lm_proxy.api_key_check.check_api_key_in_config" 149 | 150 | # LLM Provider Connections 151 | [connections] 152 | 153 | [connections.openai] 154 | api_type = "open_ai" 155 | api_base = "https://api.openai.com/v1/" 156 | api_key = "env:OPENAI_API_KEY" 157 | 158 | [connections.google] 159 | api_type = "google_ai_studio" 160 | api_key = "env:GOOGLE_API_KEY" 161 | 162 | [connections.anthropic] 163 | api_type = "anthropic" 164 | api_key = "env:ANTHROPIC_API_KEY" 165 | 166 | # Routing rules (model_pattern = "connection.model") 167 | [routing] 168 | "gpt*" = "openai.*" # Route all GPT models to OpenAI 169 | "claude*" = "anthropic.*" # Route all Claude models to Anthropic 170 | "gemini*" = "google.*" # Route all Gemini models to Google 171 | "*" = "openai.gpt-3.5-turbo" # Default fallback 172 | 173 | # Access control groups 174 | [groups.default] 175 | api_keys = [ 176 | "KEY1", 177 | "KEY2" 178 | ] 179 | 180 | # optional 181 | [[loggers]] 182 | class = 'lm_proxy.loggers.BaseLogger' 183 | [loggers.log_writer] 184 | class = 'lm_proxy.loggers.log_writers.JsonLogWriter' 185 | file_name = 'storage/json.log' 186 | [loggers.entry_transformer] 187 | class = 'lm_proxy.loggers.LogEntryTransformer' 188 | completion_tokens = "response.usage.completion_tokens" 189 | prompt_tokens = "response.usage.prompt_tokens" 190 | prompt = "request.messages" 191 | response = "response" 192 | group = "group" 193 | connection = "connection" 194 | api_key_id = "api_key_id" 195 | remote_addr = "remote_addr" 196 | created_at = "created_at" 197 | duration = "duration" 198 | ``` 199 | 200 | ### Environment Variables 201 | 202 | You can reference environment variables in your configuration file by prefixing values with `env:`. 203 | 204 | For example: 205 | 206 | ```toml 207 | [connections.openai] 208 | api_key = "env:OPENAI_API_KEY" 209 | ``` 210 | 211 | At runtime, LM-Proxy automatically retrieves the value of the target variable 212 | (OPENAI_API_KEY) from your operating system’s environment or from a .env file, if present. 213 | 214 | ### .env Files 215 | 216 | By default, LM-Proxy looks for a `.env` file in the current working directory 217 | and loads environment variables from it. 218 | 219 | You can refer to the [.env.template](https://github.com/Nayjest/lm-proxy/blob/main/.env.template) 220 | file for an example: 221 | ```dotenv 222 | OPENAI_API_KEY=sk-u........ 223 | GOOGLE_API_KEY=AI........ 224 | ANTHROPIC_API_KEY=sk-ant-api03--vE........ 225 | 226 | # "1", "TRUE", "YES", "ON", "ENABLED", "Y", "+" are true, case-insensitive. 227 | # See https://github.com/Nayjest/ai-microcore/blob/v4.4.3/microcore/configuration.py#L36 228 | LM_PROXY_DEBUG=no 229 | ``` 230 | 231 | You can also control `.env` file usage with the `--env` command-line option: 232 | 233 | ```bash 234 | # Use a custom .env file path 235 | lm-proxy --env="path/to/your/.env" 236 | # Disable .env loading 237 | lm-proxy --env="" 238 | ``` 239 | 240 | ## 🔑 Proxy API Keys vs. Provider API Keys 241 | 242 | LM-Proxy utilizes two distinct types of API keys to facilitate secure and efficient request handling. 243 | 244 | - **Proxy API Key (Virtual API Key, Client API Key):** 245 | A unique key generated and managed within the LM-Proxy. 246 | Clients use these keys to authenticate their requests to the proxy's API endpoints. 247 | Each Client API Key is associated with a specific group, which defines the scope of access and permissions for the client's requests. 248 | These keys allow users to securely interact with the proxy without direct access to external service credentials. 249 | 250 | 251 | 252 | - **Provider API Key (Upstream API Key):** 253 | A key provided by external LLM inference providers (e.g., OpenAI, Anthropic, Mistral, etc.) and configured within the LM-Proxy. 254 | The proxy uses these keys to authenticate and forward validated client requests to the respective external services. 255 | Provider API Keys remain hidden from end users, ensuring secure and transparent communication with provider APIs. 256 | 257 | This distinction ensures a clear separation of concerns: 258 | Virtual API Keys manage user authentication and access within the proxy, 259 | while Upstream API Keys handle secure communication with external providers. 260 | 261 | ## 🔌 API Usage 262 | 263 | LM-Proxy implements the OpenAI chat completions API endpoint. You can use any OpenAI-compatible client to interact with it. 264 | 265 | ### Chat Completions Endpoint 266 | 267 | ```http 268 | POST /v1/chat/completions 269 | ``` 270 | 271 | #### Request Format 272 | 273 | ```json 274 | { 275 | "model": "gpt-3.5-turbo", 276 | "messages": [ 277 | {"role": "system", "content": "You are a helpful assistant."}, 278 | {"role": "user", "content": "What is the capital of France?"} 279 | ], 280 | "temperature": 0.7, 281 | "stream": false 282 | } 283 | ``` 284 | 285 | #### Response Format 286 | 287 | ```json 288 | { 289 | "choices": [ 290 | { 291 | "index": 0, 292 | "message": { 293 | "role": "assistant", 294 | "content": "The capital of France is Paris." 295 | }, 296 | "finish_reason": "stop" 297 | } 298 | ] 299 | } 300 | ``` 301 | 302 | 303 | ### Models List Endpoint 304 | 305 | 306 | List and describe all models available through the API. 307 | 308 | 309 | ```http 310 | GET /v1/models 311 | ``` 312 | 313 | The **LM-Proxy** dynamically builds the models list based on routing rules defined in `config.routing`. 314 | Routing keys can reference both **exact model names** and **model name patterns** (e.g., `"gpt*"`, `"claude*"`, etc.). 315 | 316 | By default, wildcard patterns are displayed as-is in the models list (e.g., `"gpt*"`, `"claude*"`). 317 | This behavior can be customized via the `model_listing_mode` configuration option: 318 | 319 | ``` 320 | model_listing_mode = "as_is" | "ignore_wildcards" | "expand_wildcards" 321 | ``` 322 | 323 | Available modes: 324 | 325 | - **`as_is`** *(default)* — Lists all entries exactly as defined in the routing configuration, including wildcard patterns. 326 | - **`ignore_wildcards`** — Excludes wildcard patterns, showing only explicitly defined model names. 327 | - **`expand_wildcards`** — Expands wildcard patterns by querying each connected backend for available models *(feature not yet implemented)*. 328 | 329 | To obtain a complete and accurate model list in the current implementation, 330 | all supported models must be explicitly defined in the routing configuration, for example: 331 | ```toml 332 | [routing] 333 | "gpt-4" = "my_openai_connection.*" 334 | "gpt-5" = "my_openai_connection.*" 335 | "gpt-8"= "my_openai_connection.gpt-3.5-turbo" 336 | "claude-4.5-sonnet" = "my_anthropic_connection.claude-sonnet-4-5-20250929" 337 | "claude-4.1-opus" = "my_anthropic_connection.claude-opus-4-1-20250805" 338 | [connections] 339 | [connections.my_openai_connection] 340 | api_type = "open_ai" 341 | api_base = "https://api.openai.com/v1/" 342 | api_key = "env:OPENAI_API_KEY" 343 | [connections.my_anthropic_connection] 344 | api_type = "anthropic" 345 | api_key = "env:ANTHROPIC_API_KEY" 346 | ``` 347 | 348 | 349 | 350 | #### Response Format 351 | 352 | ```json 353 | { 354 | "object": "list", 355 | "data": [ 356 | { 357 | "id": "gpt-6", 358 | "object": "model", 359 | "created": 1686935002, 360 | "owned_by": "organization-owner" 361 | }, 362 | { 363 | "id": "claude-5-sonnet", 364 | "object": "model", 365 | "created": 1686935002, 366 | "owned_by": "organization-owner" 367 | } 368 | ] 369 | } 370 | ``` 371 | 372 | ## 🔒 User Groups Configuration 373 | 374 | The `[groups]` section in the configuration defines access control rules for different user groups. 375 | Each group can have its own set of virtual API keys and permitted connections. 376 | 377 | ### Basic Group Definition 378 | 379 | ```toml 380 | [groups.default] 381 | api_keys = ["KEY1", "KEY2"] 382 | allowed_connections = "*" # Allow access to all connections 383 | ``` 384 | 385 | ### Group-based Access Control 386 | 387 | You can create multiple groups to segment your users and control their access: 388 | 389 | ```toml 390 | # Admin group with full access 391 | [groups.admin] 392 | api_keys = ["ADMIN_KEY_1", "ADMIN_KEY_2"] 393 | allowed_connections = "*" # Access to all connections 394 | 395 | # Regular users with limited access 396 | [groups.users] 397 | api_keys = ["USER_KEY_1", "USER_KEY_2"] 398 | allowed_connections = "openai,anthropic" # Only allowed to use specific connections 399 | 400 | # Free tier with minimal access 401 | [groups.free] 402 | api_keys = ["FREE_KEY_1", "FREE_KEY_2"] 403 | allowed_connections = "openai" # Only allowed to use OpenAI connection 404 | ``` 405 | 406 | ### Connection Restrictions 407 | 408 | The `allowed_connections` parameter controls which upstream providers a group can access: 409 | 410 | - `"*"` - Group can use all configured connections 411 | - `"openai,anthropic"` - Comma-separated list of specific connections the group can use 412 | 413 | This allows fine-grained control over which users can access which AI providers, enabling features like: 414 | 415 | - Restricting expensive models to premium users 416 | - Creating specialized access tiers for different user groups 417 | - Implementing usage quotas per group 418 | - Billing and cost allocation by user group 419 | 420 | ### Virtual API Key Validation 421 | 422 | #### Overview 423 | 424 | LM-proxy includes 2 built-in methods for validating Virtual API keys: 425 | - `lm_proxy.api_key_check.check_api_key_in_config` - verifies API keys against those defined in the config file; used by default 426 | - `lm_proxy.api_key_check.CheckAPIKeyWithRequest` - validates API keys via an external HTTP service 427 | 428 | The API key check method can be configured using the `api_key_check` configuration key. 429 | Its value can be either a reference to a Python function in the format `my_module.sub_module1.sub_module2.fn_name`, 430 | or an object containing parameters for a class-based validator. 431 | 432 | In the .py config representation, the validator function can be passed directly as a callable. 433 | 434 | #### Example configuration for external API key validation using HTTP request to Keycloak / OpenID Connect 435 | 436 | This example shows how to validate API keys against an external service (e.g., Keycloak): 437 | 438 | ```toml 439 | [api_key_check] 440 | class = "lm_proxy.api_key_check.CheckAPIKeyWithRequest" 441 | method = "POST" 442 | url = "http://keycloak:8080/realms/master/protocol/openid-connect/userinfo" 443 | response_as_user_info = true # interpret response JSON as user info object for further processing / logging 444 | use_cache = true # requires installing cachetools if True: pip install cachetools 445 | cache_ttl = 60 # Cache duration in seconds 446 | 447 | [api_key_check.headers] 448 | Authorization = "Bearer {api_key}" 449 | ``` 450 | #### Custom API Key Validation / Extending functionality 451 | 452 | For more advanced authentication needs, 453 | you can implement a custom validator function: 454 | 455 | ```python 456 | # my_validators.py 457 | def validate_api_key(api_key: str) -> str | None: 458 | """ 459 | Validate an API key and return the group name if valid. 460 | 461 | Args: 462 | api_key: The API key to validate 463 | 464 | Returns: 465 | The name of the group if valid, None otherwise 466 | """ 467 | if api_key == "secret-key": 468 | return "admin" 469 | elif api_key.startswith("user-"): 470 | return "users" 471 | return None 472 | ``` 473 | 474 | Then reference it in your config: 475 | 476 | ```toml 477 | api_key_check = "my_validators.validate_api_key" 478 | ``` 479 | > **NOTE** 480 | > In this case, the `api_keys` lists in groups are ignored, and the custom function is responsible for all validation logic. 481 | 482 | 483 | ## 🛠️ Advanced Usage 484 | ### Dynamic Model Routing 485 | 486 | The routing section allows flexible pattern matching with wildcards: 487 | 488 | ```toml 489 | [routing] 490 | "gpt-4*" = "openai.gpt-4" # Route gpt-4 requests to OpenAI GPT-4 491 | "gpt-3.5*" = "openai.gpt-3.5-turbo" # Route gpt-3.5 requests to OpenAI 492 | "claude*" = "anthropic.*" # Pass model name as-is to Anthropic 493 | "gemini*" = "google.*" # Pass model name as-is to Google 494 | "custom*" = "local.llama-7b" # Map any "custom*" to a specific local model 495 | "*" = "openai.gpt-3.5-turbo" # Default fallback for unmatched models 496 | ``` 497 | Keys are model name patterns (with `*` wildcard support), and values are connection/model mappings. 498 | Connection names reference those defined in the `[connections]` section. 499 | 500 | ### Load Balancing Example 501 | 502 | - [Simple load-balancer configuration](https://github.com/Nayjest/lm-proxy/blob/main/examples/load_balancer_config.py) 503 | This example demonstrates how to set up a load balancer that randomly 504 | distributes requests across multiple language model servers using the lm_proxy. 505 | 506 | ### Google Vertex AI Configuration Example 507 | 508 | - [vertex-ai.toml](https://github.com/Nayjest/lm-proxy/blob/main/examples/vertex-ai.toml) 509 | This example demonstrates how to connect LM-Proxy to Google Gemini model via Vertex AI API 510 | 511 | ### Using Tokens from OIDC Provider as Virtual/Client API Keys 512 | 513 | You can configure LM-Proxy to validate tokens from OpenID Connect (OIDC) providers like Keycloak, Auth0, or Okta as API keys. 514 | 515 | The following configuration validates Keycloak access tokens by calling the userinfo endpoint: 516 | ```toml 517 | [api_key_check] 518 | class = "lm_proxy.api_key_check.CheckAPIKeyWithRequest" 519 | method = "POST" 520 | url = "http://keycloak:8080/realms/master/protocol/openid-connect/userinfo" 521 | response_as_user_info = true 522 | use_cache = true 523 | cache_ttl = 60 524 | 525 | [api_key_check.headers] 526 | Authorization = "Bearer {api_key}" 527 | ``` 528 | 529 | **Configuration Parameters:** 530 | 531 | - `class` - The API key validation handler class ([lm_proxy.api_key_check.CheckAPIKeyWithRequest](https://github.com/Nayjest/lm-proxy/blob/main/lm_proxy/api_key_check/with_request.py)) 532 | - `method` - HTTP method for the validation request (typically `POST` or `GET`) 533 | - `url` - The OIDC provider's userinfo endpoint URL 534 | - `response_as_user_info` - Parse the response as user information for further usage in LM-Proxy (extend logged info, determine user group, etc.) 535 | - `use_cache` - Enable caching of validation results (requires installing the `cachetools` package if enabled: `pip install cachetools`) 536 | - `cache_ttl` - Cache time-to-live in seconds (reduces load on identity provider) 537 | - `headers` - Dictionary of headers to send with the validation request 538 | 539 | > **Note**: The `{api_key}` placeholder can be used in headers or in the URL. LM-Proxy substitutes it with the API key from the client to perform the check. 540 | 541 | 542 | **Usage:** 543 | 544 | Clients pass their OIDC access token as the API key when making requests to LM-Proxy. 545 | 546 | ## 🧩 Add-on Components 547 | 548 | ### Database Connector 549 | 550 | [lm-proxy-db-connector](https://github.com/nayjest/lm-proxy-db-connector) is a lightweight SQLAlchemy-based connector that enables LM-Proxy to work with relational databases including PostgreSQL, MySQL/MariaDB, SQLite, Oracle, Microsoft SQL Server, and many others. 551 | 552 | **Key Features:** 553 | - Configure database connections directly through LM-Proxy configuration 554 | - Share database connections across components, extensions, and custom functions 555 | - Built-in database logger for structured logging of AI request data 556 | 557 | ## 🔍 Debugging 558 | 559 | ### Overview 560 | When **debugging mode** is enabled, 561 | LM-Proxy provides detailed logging information to help diagnose issues: 562 | - Stack traces for exceptions are shown in the console 563 | - Logging level is set to DEBUG instead of INFO 564 | 565 | > **Warning** ⚠️ 566 | > Never enable debugging mode in production environments, as it may expose sensitive information to the application logs. 567 | 568 | ### Enabling Debugging Mode 569 | To enable debugging, set the `LM_PROXY_DEBUG` environment variable to a truthy value (e.g., "1", "true", "yes"). 570 | > **Tip** 💡 571 | > Environment variables can also be defined in a `.env` file. 572 | 573 | Alternatively, you can enable or disable debugging via the command-line arguments: 574 | - `--debug` to enable debugging 575 | - `--no-debug` to disable debugging 576 | 577 | > **Note** ℹ️ 578 | > CLI arguments override environment variable settings. 579 | 580 | 581 | ## 🤝 Contributing 582 | 583 | Contributions are welcome! Please feel free to submit a Pull Request. 584 | 585 | 1. Fork the repository 586 | 2. Create your feature branch (`git checkout -b feature/amazing-feature`) 587 | 3. Commit your changes (`git commit -m 'Add some amazing feature'`) 588 | 4. Push to the branch (`git push origin feature/amazing-feature`) 589 | 5. Open a Pull Request 590 | 591 | 592 | ## 📄 License 593 | 594 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 595 | © 2025 Vitalii Stepanenko 596 | --------------------------------------------------------------------------------