├── app ├── __init__.py ├── api │ ├── __init__.py │ ├── models.py │ └── routes.py ├── core │ ├── __init__.py │ ├── security.py │ └── config.py ├── tests │ ├── __init__.py │ ├── test_api.py │ └── test_services.py ├── services │ ├── __init__.py │ ├── llm_service.py │ └── cache_service.py └── main.py ├── requirements.txt ├── Dockerfile ├── config.yaml ├── .gitignore ├── usage.ipynb └── README.md /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/core/security.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/tests/test_api.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/tests/test_services.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | pydantic 4 | pydantic-settings 5 | redis 6 | pymongo 7 | httpx 8 | pytest 9 | pytest-asyncio 10 | PyYAML 11 | requests -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt . 6 | RUN pip install --no-cache-dir -r requirements.txt 7 | 8 | COPY . . 9 | 10 | CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from app.api.routes import router as api_router 3 | from app.core.config import settings 4 | 5 | app = FastAPI(title="cache-cool", version="0.1.0") 6 | 7 | app.include_router(api_router 8 | # , prefix="/v1" 9 | ) 10 | 11 | @app.get("/health") 12 | async def health_check(): 13 | return {"status": "ok"} 14 | 15 | if __name__ == "__main__": 16 | import uvicorn 17 | uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True) -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | llm_schemas: 2 | openai: 3 | endpoint: "https://api.openai.com/v1/chat/completions" 4 | headers: 5 | - "Content-Type: application/json" 6 | - "Authorization: Bearer {api_key}" 7 | temperature_threshold: 0.8 8 | claude: 9 | endpoint: "https://api.claude.ai/v1/chat/completions" 10 | headers: 11 | - "Content-Type: application/json" 12 | - "Authorization: Bearer {api_key}" 13 | temperature_threshold: 0.85 14 | avalai: 15 | endpoint: "https://api.avalapis.ir/v1/chat/completions" 16 | headers: 17 | - "Content-Type: application/json" 18 | - "Authorization: {api_key}" 19 | temperature_threshold: 0.85 20 | groq: 21 | endpoint: "https://api.groq.com/openai/v1/chat/completions" 22 | headers: 23 | - "Content-Type: application/json" 24 | - "Authorization: {api_key}" 25 | temperature_threshold: 0.8 26 | 27 | mongodb: 28 | uri: "mongodb://localhost:27017" 29 | db_name: "llm_cache_db" 30 | collection_name: "cache" 31 | 32 | json_cache_file: "cache.json" 33 | 34 | redis: 35 | enabled: false 36 | host: "localhost" 37 | port: 6379 38 | db: 0 39 | 40 | current_llm_service: "openai" 41 | use_json_cache: true 42 | use_mongo_cache: true 43 | cache_max_size: 3 -------------------------------------------------------------------------------- /app/services/llm_service.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from fastapi.datastructures import Headers 3 | import httpx 4 | from app.core.config import settings 5 | from app.api.models import ChatCompletionRequest 6 | 7 | class LLMService: 8 | def __init__(self,service:str=None): 9 | self.current_service = service or settings.current_llm_service 10 | self.schema = settings.llm_schemas[self.current_service] 11 | 12 | async def generate_response(self, request: ChatCompletionRequest,api_key:str): 13 | request_json=request.model_dump_json() 14 | 15 | headers = {header.split(': ')[0]: header.split(': ')[1].format(api_key=api_key) 16 | for header in self.schema.headers} 17 | print(headers) 18 | async with httpx.AsyncClient() as client: 19 | response = await client.post( 20 | str(self.schema.endpoint), 21 | headers=headers, 22 | content=request_json 23 | ) 24 | print([i for i in response.iter_text()]) 25 | response.raise_for_status() 26 | return response.json() 27 | 28 | def get_temperature_threshold(self): 29 | return self.schema.temperature_threshold 30 | @classmethod 31 | def check_type(cls, schema_name): 32 | if schema_name not in settings.llm_schemas: 33 | return None 34 | else: return cls(schema_name) -------------------------------------------------------------------------------- /app/core/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pydantic import BaseModel, Field 3 | from pydantic_settings import BaseSettings 4 | import yaml 5 | from typing import Dict, List, Optional 6 | 7 | 8 | from pydantic_settings import BaseSettings, SettingsConfigDict 9 | from pydantic import HttpUrl, Field 10 | from typing import List, Dict, Optional 11 | 12 | class LLMSchema(BaseModel): 13 | endpoint: HttpUrl 14 | headers: List[str] 15 | temperature_threshold: float 16 | 17 | class MongoDBConfig(BaseModel): 18 | uri: str 19 | db_name: str 20 | collection_name: str 21 | 22 | class RedisConfig(BaseModel): 23 | enabled: bool 24 | host: str 25 | port: int 26 | db: int 27 | 28 | class Configs(BaseModel): 29 | llm_schemas: Dict[str, LLMSchema] 30 | mongodb: MongoDBConfig 31 | json_cache_file: str 32 | redis: RedisConfig 33 | current_llm_service: str 34 | use_json_cache: bool 35 | use_mongo_cache: bool 36 | cache_max_size: int 37 | 38 | @classmethod 39 | def load_from_yaml(cls, yaml_file: str): 40 | with open(yaml_file, "r") as f: 41 | config_data = yaml.safe_load(f) 42 | return cls(**config_data) 43 | 44 | # class Config(SettingsConfigDict): 45 | # env_file = ".env" # Specify an environment file if needed 46 | # env_file_encoding = 'utf-8' 47 | # case_sensitive = True 48 | 49 | 50 | 51 | settings = Configs.load_from_yaml("config.yaml") 52 | -------------------------------------------------------------------------------- /app/api/models.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, HttpUrl, Field, model_validator 2 | from typing import Any, List, Optional, Dict 3 | from app.core.config import settings 4 | 5 | # Message model used when current_llm_service == "groq" (no message field) 6 | class MessageGroq(BaseModel): 7 | role: Optional[str] 8 | content: Optional[str] 9 | @model_validator(mode='before') 10 | @classmethod 11 | def validate_input(cls, values: Dict[str, Any]) -> Dict[str, Any]: 12 | # Check if the input is a simple string 13 | if isinstance(values, str): 14 | values = {"role": "user", "content": values} 15 | elif isinstance(values, dict): 16 | # Ensure required fields are present 17 | if 'content' not in values: 18 | raise ValueError("Field 'content' is required.") 19 | if 'role' not in values: 20 | values['role'] = "user" # Default role if not provided 21 | return values 22 | 23 | # Default Message model with message field 24 | class MessageDefault(MessageGroq): 25 | message: Optional[str] 26 | 27 | @model_validator(mode="before") 28 | @classmethod 29 | def validate_input(cls, values: Dict[str, Any]) -> Dict[str, Any]: 30 | values = super().validate_input(values) 31 | values['message'] = values.get('message', values['content']) # Default message 32 | return values 33 | 34 | 35 | # Dynamically select Message model based on current_llm_service setting 36 | MessageModel = MessageGroq if settings.current_llm_service == "groq" else MessageDefault 37 | 38 | class ChatCompletionRequest(BaseModel): 39 | model: str 40 | messages: List[MessageModel] 41 | temperature: float = Field(default=1.0, ge=0.0, le=2.0) 42 | max_tokens: Optional[int] = None 43 | n:Optional[int]=None 44 | stop:Optional[str]="" 45 | 46 | class LLMSchemaUpdate(BaseModel): 47 | endpoint: Optional[str] = None 48 | headers: Optional[List[str]] = None 49 | temperature_threshold: Optional[float] = None 50 | 51 | 52 | class LLMSchema(BaseModel): 53 | endpoint: HttpUrl 54 | headers: List[str] 55 | temperature_threshold: float 56 | 57 | class MongoDBConfig(BaseModel): 58 | uri: str 59 | db_name: str 60 | collection_name: str 61 | 62 | class RedisConfig(BaseModel): 63 | enabled: bool 64 | host: str 65 | port: int 66 | db: int 67 | 68 | class ConfigurationUpdate(BaseModel): 69 | llm_schemas: Dict[str, LLMSchema] 70 | mongodb: MongoDBConfig 71 | json_cache_file: str 72 | redis: RedisConfig 73 | current_llm_service: str 74 | use_json_cache: bool 75 | use_mongo_cache: bool 76 | cache_max_size: int 77 | -------------------------------------------------------------------------------- /app/api/routes.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict 3 | from fastapi import APIRouter, HTTPException, Request 4 | from app.core.config import Configs, settings 5 | from app.services.llm_service import LLMService 6 | from app.services.cache_service import CacheService 7 | from app.api.models import ChatCompletionRequest, ConfigurationUpdate 8 | 9 | router = APIRouter() 10 | 11 | 12 | @router.post("/{schema_name}/chat/completions") 13 | async def chat_completions(schema_name: str,request: Request): 14 | cache_service = CacheService() 15 | llm_service = LLMService.check_type(schema_name) 16 | request_body_json: Dict = await request.json() 17 | 18 | print(request_body_json) 19 | chat_request=ChatCompletionRequest(**request_body_json) 20 | # Access headers 21 | headers = request.headers 22 | api_key=headers.get("Authorization") 23 | if not api_key: 24 | raise HTTPException(status_code=401, detail="no key!?") 25 | 26 | # Check cache if temperature is below threshold 27 | if chat_request.temperature <= llm_service.get_temperature_threshold(): 28 | cached_response = await cache_service.get(chat_request.model_dump_json()) 29 | if cached_response: 30 | print("hit!") 31 | return cached_response 32 | # Make API call 33 | response = await llm_service.generate_response(chat_request,api_key) 34 | 35 | # Cache response if temperature is below threshold 36 | if chat_request.temperature <= llm_service.get_temperature_threshold(): 37 | await cache_service.set(chat_request.model_dump_json(), response) 38 | 39 | return response 40 | 41 | @router.get("/configure") 42 | async def get_configuration(): 43 | return settings 44 | 45 | 46 | @router.put("/configure") 47 | async def update_configuration(config: Configs): 48 | global settings # Declare settings as global to modify the global settings object 49 | 50 | try: 51 | # Update settings using the `copy(update=...)` method 52 | updated_settings = settings.copy(update=config.dict(exclude_unset=True)) 53 | 54 | # Apply the updated settings 55 | settings = updated_settings 56 | except Exception as e: 57 | # Return an error response if updating fails 58 | raise HTTPException(status_code=400, detail=f'Error in input validation: {e}') 59 | 60 | # Reinitialize services if certain keys are updated 61 | update_keys = config.dict(exclude_unset=True).keys() 62 | 63 | # Check if LLM service needs to be reinitialized 64 | if 'current_llm_service' in update_keys: 65 | LLMService().__init__() # Reinitialize LLM service 66 | 67 | # Check if any cache-related settings were updated 68 | cache_related_keys = {'use_json_cache', 'use_mongo_cache', 'redis'} 69 | if cache_related_keys.intersection(update_keys): 70 | CacheService().__init__() # Reinitialize Cache service 71 | 72 | return {"message": "Configuration updated successfully"} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | .venv/ 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | cache.json -------------------------------------------------------------------------------- /app/services/cache_service.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pymongo import MongoClient 3 | import redis 4 | from app.core.config import settings 5 | from collections import OrderedDict 6 | import os 7 | from datetime import datetime 8 | 9 | class CacheService: 10 | def __init__(self): 11 | self.use_json_cache = settings.use_json_cache 12 | self.use_mongo_cache = settings.use_mongo_cache 13 | self.max_cache_size = settings.cache_max_size 14 | if self.use_mongo_cache: 15 | self.mongo_client = MongoClient(settings.mongodb.uri) 16 | self.mongo_db = self.mongo_client[settings.mongodb.db_name] 17 | self.mongo_collection = self.mongo_db[settings.mongodb.collection_name] 18 | if settings.redis.enabled: 19 | self.redis_client = redis.Redis( 20 | host=settings.redis.host, 21 | port=settings.redis.port, 22 | db=settings.redis.db, 23 | ) 24 | 25 | def _load_json_cache(self) -> OrderedDict: 26 | try: 27 | if os.path.exists(settings.json_cache_file): 28 | with open(settings.json_cache_file, "r") as f: 29 | data = json.load(f) 30 | return OrderedDict(data) 31 | except json.JSONDecodeError: 32 | pass 33 | return OrderedDict() 34 | 35 | def _save_json_cache(self, cache: OrderedDict): 36 | with open(settings.json_cache_file, "w") as f: 37 | json.dump(cache, f) 38 | 39 | async def get(self, key: str): 40 | if settings.redis.enabled: 41 | redis_result = self.redis_client.get(key) 42 | if redis_result: 43 | return json.loads(redis_result) 44 | 45 | if self.use_mongo_cache: 46 | mongo_result = self.mongo_collection.find_one_and_update( 47 | {"_id": key}, {"$set": {"lastAccessed": datetime.utcnow()}} # Update lastAccessed to track usage time for LRU eviction 48 | ) 49 | if mongo_result: 50 | return mongo_result["response"] 51 | 52 | if self.use_json_cache: 53 | cache = self._load_json_cache() 54 | if key in cache: 55 | # Move accessed key to end (most recently used) 56 | cache.move_to_end(key) 57 | self._save_json_cache(cache) 58 | return cache[key] 59 | 60 | return None 61 | 62 | async def set(self, key: str, value: str, expire: int = 3600): 63 | if settings.redis.enabled: 64 | self.redis_client.setex(key, expire, json.dumps(value)) 65 | 66 | if self.use_mongo_cache: 67 | count = self.mongo_collection.count_documents({}) 68 | if count >= self.max_cache_size: 69 | oldest = self.mongo_collection.find_one( 70 | sort=[("lastAccessed", 1)] # Ascending → oldest first 71 | ) 72 | if oldest: 73 | self.mongo_collection.delete_one({"_id": oldest["_id"]}) 74 | print(f"Evicted LRU key: {oldest['_id']}") 75 | 76 | self.mongo_collection.update_one( 77 | {"_id": key}, 78 | {"$set": {"response": value, "lastAccessed": datetime.utcnow()}}, 79 | upsert=True 80 | ) 81 | 82 | if self.use_json_cache: 83 | cache = self._load_json_cache() 84 | cache[key] = value 85 | cache.move_to_end(key) 86 | 87 | if len(cache) > self.max_cache_size: 88 | evicted_key, _ = cache.popitem(last=False) 89 | print(f"Evicted LRU key: {evicted_key}") 90 | 91 | self._save_json_cache(cache) 92 | 93 | async def delete(self, key: str): 94 | if settings.redis.enabled: 95 | self.redis_client.delete(key) 96 | 97 | if self.use_mongo_cache: 98 | self.mongo_collection.delete_one({"_id": key}) 99 | 100 | if self.use_json_cache: 101 | cache = self._load_json_cache() 102 | if key in cache: 103 | del cache[key] 104 | self._save_json_cache(cache) 105 | -------------------------------------------------------------------------------- /usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "cache_cool_uri=\"http://127.0.0.1:8000/avalai\"\n", 10 | "cache_cool_base_uri=\"http://127.0.0.1:8000/\"" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from dotenv import load_dotenv\n", 20 | "load_dotenv()\n", 21 | "import os\n", 22 | "OPENAI_API_KEY=os.environ['OPENAI_API_KEY']" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# OPENAI_API_KEY=''" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import requests\n", 41 | "import json\n", 42 | "\n", 43 | "# reqUrl = \"https://api.openai.com/v1/chat/completions\"\n", 44 | "reqUrl = f\"{cache_cool_uri}/chat/completions\"\n", 45 | "\n", 46 | "headersList = {\n", 47 | " \"Accept\": \"*/*\",\n", 48 | " \"User-Agent\": \"Thunder Client (https://www.thunderclient.com)\",\n", 49 | " \"Content-Type\": \"application/json\",\n", 50 | " \"Authorization\": f\"Bearer {OPENAI_API_KEY}\" \n", 51 | "}\n", 52 | "\n", 53 | "payload = json.dumps({\n", 54 | " \"model\": \"gpt-4o-mini\",\n", 55 | " \"messages\": [\n", 56 | " {\n", 57 | " \"role\": \"user\",\n", 58 | " \"content\": \"Hello, how can I use ChatGPT with Claude?\"\n", 59 | " }\n", 60 | " ],\n", 61 | " \"max_tokens\": 100\n", 62 | "})\n", 63 | "\n", 64 | "response = requests.request(\"POST\", reqUrl, data=payload, headers=headersList)\n", 65 | "\n", 66 | "print(response.json())" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "from langchain_openai import ChatOpenAI\n", 76 | "# from langchain.prompts.chat import , HumanMessage\n", 77 | "\n", 78 | "# Initialize the chat model\n", 79 | "chat = ChatOpenAI(base_url=cache_cool_uri, openai_api_key=OPENAI_API_KEY, model=\"gpt-3.5-turbo\")\n", 80 | "\n", 81 | "# Create messages\n", 82 | "messages = [\n", 83 | " (\"system\",\"You are a helpful assistant.\"),\n", 84 | " (\"user\",\"Can you explain how to use Claude?\")\n", 85 | "]\n", 86 | "\n", 87 | "# Get a response\n", 88 | "response = chat(messages)\n", 89 | "print(response)\n" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "import openai\n", 99 | "# Set your OpenAI API key\n", 100 | "# openai.api_key =OPENAI_API_KEY\n", 101 | "\n", 102 | "cli=openai.OpenAI(\n", 103 | " api_key=OPENAI_API_KEY,\n", 104 | " base_url=cache_cool_uri\n", 105 | ")\n", 106 | "# Define the prompt\n", 107 | "prompt = \"Hello, how can I use ChatGPT with Claude?\"\n", 108 | "\n", 109 | "\n", 110 | "\n", 111 | "# Generate a response\n", 112 | "response =cli.chat.completions.create(\n", 113 | " \n", 114 | " model=\"gpt-4o-mini\",\n", 115 | " messages=[prompt],\n", 116 | " max_tokens=100,\n", 117 | " n=1,\n", 118 | " stop=None,\n", 119 | " temperature=0.5,\n", 120 | ")\n", 121 | "\n", 122 | "# Print the generated response\n", 123 | "print(response.choices[0].message.content)\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "import requests\n", 133 | "response=requests.get(cache_cool_base_uri+'configure')\n", 134 | "configure_response=response.json()\n", 135 | "configure_response" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "import requests\n", 145 | "\n", 146 | "# Define the URL for the configure endpoint\n", 147 | "url = \"http://localhost:8000/configure\" # Replace with the actual URL of your FastAPI server\n", 148 | "\n", 149 | "# Complete payload with all required fields\n", 150 | "payload = {\n", 151 | " \"llm_schemas\": {\n", 152 | " \"gemini\": {\n", 153 | " \"endpoint\": \"https://api.google.com/v1/chat/completions\",\n", 154 | " \"headers\": [\n", 155 | " \"Content-Type: application/json\",\n", 156 | " \"Authorization: Bearer {api_key}\"\n", 157 | " ],\n", 158 | " \"temperature_threshold\": 0.8\n", 159 | " }\n", 160 | " },\n", 161 | " \"mongodb\": {\n", 162 | " \"uri\": \"mongodb://localhost:27017\",\n", 163 | " \"db_name\": \"test_db\",\n", 164 | " \"collection_name\": \"test_collection\"\n", 165 | " },\n", 166 | " \"json_cache_file\": \"cache.json\",\n", 167 | " \"redis\": {\n", 168 | " \"enabled\": False,\n", 169 | " \"host\": \"localhost\",\n", 170 | " \"port\": 6379,\n", 171 | " \"db\": 0\n", 172 | " },\n", 173 | " \"current_llm_service\": \"gemini\",\n", 174 | " \"use_json_cache\": True,\n", 175 | " \"use_mongo_cache\": True\n", 176 | "}\n", 177 | "\n", 178 | "payload['llm_schemas'].update(configure_response[\"llm_schemas\"])\n", 179 | "# Send the PUT request with the complete payload as JSON\n", 180 | "response = requests.put(url, json=payload)\n", 181 | "\n", 182 | "# Print the response from the server\n", 183 | "if response.status_code == 200:\n", 184 | " print(\"Configuration updated successfully:\", response.json())\n", 185 | "else:\n", 186 | " print(f\"Failed to update configuration. Status code: {response.status_code}\")\n", 187 | " print(\"Response:\", response.text)\n" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "kaggle_agent", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.11.9" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🌟 Cache Cool 2 | 3 | **Cache-Cool** is a simple LLM (Large Language Model) caching proxy for saving your LLM calls. It acts as a caching layer for LLM API calls, such as OpenAI or Claude, to improve performance and reduce costs by avoiding redundant requests to the LLM providers. The caching is implemented using both MongoDB and JSON files. 4 | 5 | ## 📌 Project Details 6 | 7 | - **GitHub Repository**: [https://github.com/msnp1381/Cache-Cool](https://github.com/msnp1381/Cache-Cool) 8 | - **Project Name**: Cache-Cool 9 | - **Project Description**: A simple LLM caching proxy for saving your LLM calls. 10 | 11 | ## 🚀 Features 12 | 13 | - **💾 Cache Responses**: Caches responses from LLM API calls to reduce redundancy. 14 | - **⚙️ Dynamic Configuration**: Allows dynamic configuration of LLM service and caching mechanisms via the `/configure` endpoint. 15 | - **🔄 Supports Multiple LLMs**: Configurable to support different LLM services (e.g., OpenAI, Claude, Groq). 16 | - **📂 Uses MongoDB and JSON for Caching**: Leverages both MongoDB and JSON files for caching API responses. 17 | - **♻️ Implements LRU eviction for JSON and Mongo caches.** 18 | - **⚡ Redis caching relies on Redis's default LRU mechanism.** 19 | 20 | 21 | 22 | ## 📡 Endpoints 23 | 24 | - **POST /{schema_name}/chat/completions**: 25 | 26 | > ***schema_name*** is defined in **confing.yaml** 27 | 28 | Forwards chat completion requests to the configured LLM service or returns cached responses. 29 | 30 | - **GET /configure**: Retrieves current configuration details. 31 | - **PUT /configure**: Updates configuration settings dynamically. 32 | 33 | ## 🛠️ Getting Started 34 | 35 | ### Prerequisites 36 | 37 | Before you start, make sure you have: 38 | 39 | - **🐳 Docker**: Installed on your system. [Download Docker here](https://www.docker.com/products/docker-desktop) 40 | - **🍃 MongoDB**: A running MongoDB instance for caching (local or remote). 41 | - **🍅 Redis**: Optional A running Redis instance for caching. 42 | 43 | ### 📥 Installation 44 | 45 | #### Option 1: Using Docker 46 | 47 | 1. **Clone the repository**: 48 | 49 | First, download the project files: 50 | 51 | ```bash 52 | git clone https://github.com/msnp1381/cache-cool.git 53 | cd cache-cool 54 | ``` 55 | 56 | 2. **Build the Docker Image**: 57 | 58 | Now, create a Docker image for the project: 59 | 60 | ```bash 61 | docker build -t cache-cool . 62 | ``` 63 | 64 | 3. **Run the Docker Container**: 65 | 66 | Make sure MongoDB is running and accessible. Update the `config.yaml` with your MongoDB connection details, then run: 67 | 68 | ```bash 69 | docker run -p 8000:8000 --env-file .env cache-cool 70 | ``` 71 | 72 | Replace `.env` with your environment file containing necessary environment variables (like MongoDB URI). 73 | 74 | 4. **Access the Application**: 75 | 76 | Open your browser and go to [http://localhost:8000](http://localhost:8000) to start using Cache-Cool! 77 | 78 | #### Option 2: Using `requirements.txt` and Running Locally 79 | 80 | 1. **Clone the repository**: 81 | 82 | First, download the project files: 83 | 84 | ```bash 85 | git clone https://github.com/msnp1381/cache-cool.git 86 | cd cache-cool 87 | ``` 88 | 89 | 2. **Install Python Dependencies**: 90 | 91 | If you prefer using `requirements.txt`, install the dependencies as follows: 92 | 93 | ```bash 94 | python3 -m venv venv 95 | source venv/bin/activate # On Windows use `venv\Scripts\activate` 96 | pip install -r requirements.txt 97 | ``` 98 | 99 | 3. **Run the Application with Uvicorn**: 100 | 101 | Start the FastAPI application using Uvicorn: 102 | 103 | ```bash 104 | uvicorn app.main:app --reload 105 | ``` 106 | 107 | This will start the server at [http://localhost:8000](http://localhost:8000). 108 | 109 | ### ⚙️ Configuration 110 | 111 | cache-cool uses a `config.yaml` file for initial configuration. You can also update configurations dynamically using the `/configure` endpoint. 112 | 113 | #### Example config.yaml 114 | 115 | ```yaml 116 | llm_schemas: 117 | openai: 118 | endpoint: "https://api.openai.com/v1/chat/completions" 119 | headers: 120 | - "Content-Type: application/json" 121 | - "Authorization: Bearer {api_key}" 122 | temperature_threshold: 0.8 123 | claude: 124 | endpoint: "https://api.claude.ai/v1/chat/completions" 125 | headers: 126 | - "Content-Type: application/json" 127 | - "Authorization: Bearer {api_key}" 128 | temperature_threshold: 0.85 129 | avalai: 130 | endpoint: "https://api.avalapis.ir/v1/chat/completions" 131 | headers: 132 | - "Content-Type: application/json" 133 | - "Authorization: {api_key}" 134 | temperature_threshold: 0.85 135 | groq: 136 | endpoint: "https://api.groq.com/openai/v1/chat/completions" 137 | headers: 138 | - "Content-Type: application/json" 139 | - "Authorization: {api_key}" 140 | temperature_threshold: 0.8 141 | 142 | mongodb: 143 | uri: "mongodb://localhost:27017" 144 | db_name: "llm_cache_db" 145 | collection_name: "cache" 146 | 147 | json_cache_file: "cache.json" 148 | 149 | redis: 150 | enabled: false 151 | host: "localhost" 152 | port: 6379 153 | db: 0 154 | 155 | current_llm_service: "openai" 156 | use_json_cache: true 157 | use_mongo_cache: true 158 | cache_max_size: 3 159 | ``` 160 | 161 | ### ♻️ LRU Caching (Least Recently Used) 162 | 163 | Cache-Cool supports Least Recently Used (LRU) eviction to keep the cache size manageable and efficient. 164 | 165 | - For **JSON file** and **MongoDB** caching, LRU is implemented by tracking the last access time of cache entries. 166 | - You must enable `use_json_cache` or `use_mongo_cache` and set `cache_max_size` in your `config.yaml` to activate LRU eviction. For example: 167 | 168 | ```yaml 169 | use_json_cache: true 170 | use_mongo_cache: true 171 | cache_max_size: 3 172 | ``` 173 | 174 | When the number of cached items exceeds `cache_max_size`, the least recently accessed item is automatically evicted. 175 | 176 | - For **Redis**, Cache-Cool relies on Redis’s built-in LRU eviction policies. To enable LRU in Redis, configure your `redis.conf` or via command line with: 177 | 178 | ```bash 179 | maxmemory 100mb # Set maximum Redis memory usage (adjust as needed) 180 | maxmemory-policy allkeys-lru # Use LRU eviction policy when maxmemory is exceeded 181 | sudo systemctl restart redis # Restart Redis to apply changes 182 | ``` 183 | 184 | Make sure Redis caching is enabled in your `config.yaml`: 185 | 186 | ```yaml 187 | redis: 188 | enabled: true 189 | host: "localhost" 190 | port: 6379 191 | db: 0 192 | ``` 193 | 194 | This way, Redis handles eviction automatically without Cache-Cool implementing it explicitly. 195 | 196 | 197 | ### 📡 API Usage 198 | 199 | Here’s how to use the API once the service is running: 200 | 201 | #### Example Request 202 | 203 | look at **usage.ipynb** 204 | 205 | ### 🤝 Contributing 206 | 207 | We welcome contributions! Here’s how you can help: 208 | 209 | 1. Fork the repository. 210 | 2. Create a new branch (`git checkout -b feature-branch`). 211 | 3. Make your changes. 212 | 4. Commit your changes (`git commit -am 'Add some feature'`). 213 | 5. Push to the branch (`git push origin feature-branch`). 214 | 6. Create a new Pull Request. 215 | 216 | This project includes contributions from: 217 | 218 | - [Lior Soffer](https://github.com/LiorSoffer) 219 | - [Ariel Hartal](https://github.com/arielhartal) 220 | - [Adi Fayer](https://github.com/adif97) 221 | 222 | 223 | ### 📜 License 224 | 225 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 226 | 227 | ### 📧 Contact 228 | 229 | If you have any questions or issues, feel free to contact us at [mohamadnematpoor@gmail.com](mailto:mohamadnematpoor@gmail.com). 230 | 231 | Happy caching! 🚀 232 | --------------------------------------------------------------------------------