├── app
    ├── __init__.py
    ├── api
    │   ├── __init__.py
    │   ├── models.py
    │   └── routes.py
    ├── core
    │   ├── __init__.py
    │   ├── security.py
    │   └── config.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_api.py
    │   └── test_services.py
    ├── services
    │   ├── __init__.py
    │   ├── llm_service.py
    │   └── cache_service.py
    └── main.py
├── requirements.txt
├── Dockerfile
├── config.yaml
├── .gitignore
├── usage.ipynb
└── README.md


/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/core/security.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/tests/test_api.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/services/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/app/tests/test_services.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi
 2 | uvicorn
 3 | pydantic
 4 | pydantic-settings
 5 | redis
 6 | pymongo
 7 | httpx
 8 | pytest
 9 | pytest-asyncio
10 | PyYAML
11 | requests


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt .
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY . .
 9 | 
10 | CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from app.api.routes import router as api_router
 3 | from app.core.config import settings
 4 | 
 5 | app = FastAPI(title="cache-cool", version="0.1.0")
 6 | 
 7 | app.include_router(api_router
 8 |                 #    , prefix="/v1"
 9 |                    )
10 | 
11 | @app.get("/health")
12 | async def health_check():
13 |     return {"status": "ok"}
14 | 
15 | if __name__ == "__main__":
16 |     import uvicorn
17 |     uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | llm_schemas:
 2 |   openai:
 3 |     endpoint: "https://api.openai.com/v1/chat/completions"
 4 |     headers:
 5 |       - "Content-Type: application/json"
 6 |       - "Authorization: Bearer {api_key}"
 7 |     temperature_threshold: 0.8
 8 |   claude:
 9 |     endpoint: "https://api.claude.ai/v1/chat/completions"
10 |     headers:
11 |       - "Content-Type: application/json"
12 |       - "Authorization: Bearer {api_key}"
13 |     temperature_threshold: 0.85
14 |   avalai:
15 |     endpoint: "https://api.avalapis.ir/v1/chat/completions"
16 |     headers:
17 |       - "Content-Type: application/json"
18 |       - "Authorization: {api_key}"
19 |     temperature_threshold: 0.85
20 |   groq:
21 |     endpoint: "https://api.groq.com/openai/v1/chat/completions"
22 |     headers:
23 |       - "Content-Type: application/json"
24 |       - "Authorization: {api_key}"
25 |     temperature_threshold: 0.8
26 | 
27 | mongodb:
28 |   uri: "mongodb://localhost:27017"
29 |   db_name: "llm_cache_db"
30 |   collection_name: "cache"
31 | 
32 | json_cache_file: "cache.json"
33 | 
34 | redis:
35 |   enabled: false
36 |   host: "localhost"
37 |   port: 6379
38 |   db: 0
39 | 
40 | current_llm_service: "openai"
41 | use_json_cache: true
42 | use_mongo_cache: true
43 | cache_max_size: 3


--------------------------------------------------------------------------------
/app/services/llm_service.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from fastapi.datastructures import Headers
 3 | import httpx
 4 | from app.core.config import settings
 5 | from app.api.models import ChatCompletionRequest
 6 | 
 7 | class LLMService:
 8 |     def __init__(self,service:str=None):
 9 |         self.current_service = service or settings.current_llm_service
10 |         self.schema = settings.llm_schemas[self.current_service]
11 | 
12 |     async def generate_response(self, request: ChatCompletionRequest,api_key:str):
13 |         request_json=request.model_dump_json()
14 | 
15 |         headers = {header.split(': ')[0]: header.split(': ')[1].format(api_key=api_key)
16 |                 for header in self.schema.headers}
17 |         print(headers)
18 |         async with httpx.AsyncClient() as client:
19 |             response = await client.post(
20 |                 str(self.schema.endpoint),
21 |                 headers=headers,
22 |                 content=request_json
23 |             )
24 |         print([i for i in response.iter_text()])
25 |         response.raise_for_status()
26 |         return response.json()
27 | 
28 |     def get_temperature_threshold(self):
29 |         return self.schema.temperature_threshold
30 |     @classmethod
31 |     def check_type(cls, schema_name):
32 |         if schema_name not in settings.llm_schemas:
33 |             return None
34 |         else: return cls(schema_name)


--------------------------------------------------------------------------------
/app/core/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pydantic import BaseModel, Field
 3 | from pydantic_settings import BaseSettings
 4 | import yaml
 5 | from typing import Dict, List, Optional
 6 | 
 7 | 
 8 | from pydantic_settings import BaseSettings, SettingsConfigDict
 9 | from pydantic import HttpUrl, Field
10 | from typing import List, Dict, Optional
11 | 
12 | class LLMSchema(BaseModel):
13 |     endpoint: HttpUrl
14 |     headers: List[str]
15 |     temperature_threshold: float
16 | 
17 | class MongoDBConfig(BaseModel):
18 |     uri: str
19 |     db_name: str
20 |     collection_name: str
21 | 
22 | class RedisConfig(BaseModel):
23 |     enabled: bool
24 |     host: str
25 |     port: int
26 |     db: int
27 | 
28 | class Configs(BaseModel):
29 |     llm_schemas: Dict[str, LLMSchema]
30 |     mongodb: MongoDBConfig
31 |     json_cache_file: str
32 |     redis: RedisConfig
33 |     current_llm_service: str
34 |     use_json_cache: bool
35 |     use_mongo_cache: bool
36 |     cache_max_size: int
37 | 
38 |     @classmethod
39 |     def load_from_yaml(cls, yaml_file: str):
40 |         with open(yaml_file, "r") as f:
41 |             config_data = yaml.safe_load(f)        
42 |         return cls(**config_data)
43 |     
44 |     # class Config(SettingsConfigDict):
45 |     #     env_file = ".env"  # Specify an environment file if needed
46 |     #     env_file_encoding = 'utf-8'
47 |     #     case_sensitive = True
48 |     
49 | 
50 | 
51 | settings = Configs.load_from_yaml("config.yaml")
52 | 


--------------------------------------------------------------------------------
/app/api/models.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, HttpUrl, Field, model_validator
 2 | from typing import Any, List, Optional, Dict
 3 | from app.core.config import settings
 4 | 
 5 | # Message model used when current_llm_service == "groq" (no message field)
 6 | class MessageGroq(BaseModel):
 7 |     role: Optional[str]
 8 |     content: Optional[str]
 9 |     @model_validator(mode='before')
10 |     @classmethod
11 |     def validate_input(cls, values: Dict[str, Any]) -> Dict[str, Any]:
12 |         # Check if the input is a simple string
13 |         if isinstance(values, str):
14 |             values = {"role": "user", "content": values}
15 |         elif isinstance(values, dict):
16 |             # Ensure required fields are present
17 |             if 'content' not in values:
18 |                 raise ValueError("Field 'content' is required.")
19 |             if 'role' not in values:
20 |                 values['role'] = "user"  # Default role if not provided
21 |         return values
22 | 
23 | # Default Message model with message field
24 | class MessageDefault(MessageGroq):
25 |     message: Optional[str]
26 | 
27 |     @model_validator(mode="before")
28 |     @classmethod
29 |     def validate_input(cls, values: Dict[str, Any]) -> Dict[str, Any]:
30 |         values = super().validate_input(values)
31 |         values['message'] = values.get('message', values['content'])  # Default message
32 |         return values
33 | 
34 | 
35 | # Dynamically select Message model based on current_llm_service setting
36 | MessageModel = MessageGroq if settings.current_llm_service == "groq" else MessageDefault
37 | 
38 | class ChatCompletionRequest(BaseModel):
39 |     model: str
40 |     messages: List[MessageModel]
41 |     temperature: float = Field(default=1.0, ge=0.0, le=2.0)
42 |     max_tokens: Optional[int] = None
43 |     n:Optional[int]=None 
44 |     stop:Optional[str]=""
45 | 
46 | class LLMSchemaUpdate(BaseModel):
47 |     endpoint: Optional[str] = None
48 |     headers: Optional[List[str]] = None
49 |     temperature_threshold: Optional[float] = None
50 | 
51 | 
52 | class LLMSchema(BaseModel):
53 |     endpoint: HttpUrl
54 |     headers: List[str]
55 |     temperature_threshold: float
56 | 
57 | class MongoDBConfig(BaseModel):
58 |     uri: str
59 |     db_name: str
60 |     collection_name: str
61 | 
62 | class RedisConfig(BaseModel):
63 |     enabled: bool
64 |     host: str
65 |     port: int
66 |     db: int
67 | 
68 | class ConfigurationUpdate(BaseModel):
69 |     llm_schemas: Dict[str, LLMSchema]
70 |     mongodb: MongoDBConfig
71 |     json_cache_file: str
72 |     redis: RedisConfig
73 |     current_llm_service: str
74 |     use_json_cache: bool
75 |     use_mongo_cache: bool
76 |     cache_max_size: int
77 | 


--------------------------------------------------------------------------------
/app/api/routes.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Dict
 3 | from fastapi import APIRouter, HTTPException, Request
 4 | from app.core.config import Configs, settings
 5 | from app.services.llm_service import LLMService
 6 | from app.services.cache_service import CacheService
 7 | from app.api.models import ChatCompletionRequest, ConfigurationUpdate
 8 | 
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post("/{schema_name}/chat/completions")
13 | async def chat_completions(schema_name: str,request: Request):
14 |     cache_service = CacheService()
15 |     llm_service = LLMService.check_type(schema_name)
16 |     request_body_json: Dict = await request.json()
17 |     
18 |     print(request_body_json)
19 |     chat_request=ChatCompletionRequest(**request_body_json)
20 |     # Access headers
21 |     headers = request.headers
22 |     api_key=headers.get("Authorization")
23 |     if not api_key:
24 |         raise HTTPException(status_code=401, detail="no key!?")
25 | 
26 |     # Check cache if temperature is below threshold
27 |     if chat_request.temperature <= llm_service.get_temperature_threshold():
28 |         cached_response = await cache_service.get(chat_request.model_dump_json())
29 |         if cached_response:
30 |             print("hit!")
31 |             return cached_response
32 |     # Make API call
33 |     response = await llm_service.generate_response(chat_request,api_key)
34 | 
35 |     # Cache response if temperature is below threshold
36 |     if chat_request.temperature <= llm_service.get_temperature_threshold():
37 |         await cache_service.set(chat_request.model_dump_json(), response)
38 | 
39 |     return response
40 | 
41 | @router.get("/configure")
42 | async def get_configuration():
43 |     return settings
44 | 
45 | 
46 | @router.put("/configure")
47 | async def update_configuration(config: Configs):
48 |     global settings  # Declare settings as global to modify the global settings object
49 |     
50 |     try:
51 |         # Update settings using the `copy(update=...)` method
52 |         updated_settings = settings.copy(update=config.dict(exclude_unset=True))
53 |         
54 |         # Apply the updated settings
55 |         settings = updated_settings
56 |     except Exception as e:
57 |         # Return an error response if updating fails
58 |         raise HTTPException(status_code=400, detail=f'Error in input validation: {e}')
59 |     
60 |     # Reinitialize services if certain keys are updated
61 |     update_keys = config.dict(exclude_unset=True).keys()
62 |     
63 |     # Check if LLM service needs to be reinitialized
64 |     if 'current_llm_service' in update_keys:
65 |         LLMService().__init__()  # Reinitialize LLM service
66 |     
67 |     # Check if any cache-related settings were updated
68 |     cache_related_keys = {'use_json_cache', 'use_mongo_cache', 'redis'}
69 |     if cache_related_keys.intersection(update_keys):
70 |         CacheService().__init__()  # Reinitialize Cache service
71 |     
72 |     return {"message": "Configuration updated successfully"}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | .venv/
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | cache.json


--------------------------------------------------------------------------------
/app/services/cache_service.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pymongo import MongoClient
  3 | import redis
  4 | from app.core.config import settings
  5 | from collections import OrderedDict
  6 | import os
  7 | from datetime import datetime
  8 | 
  9 | class CacheService:
 10 |     def __init__(self):
 11 |         self.use_json_cache = settings.use_json_cache
 12 |         self.use_mongo_cache = settings.use_mongo_cache
 13 |         self.max_cache_size = settings.cache_max_size
 14 |         if self.use_mongo_cache:
 15 |             self.mongo_client = MongoClient(settings.mongodb.uri)
 16 |             self.mongo_db = self.mongo_client[settings.mongodb.db_name]
 17 |             self.mongo_collection = self.mongo_db[settings.mongodb.collection_name]
 18 |         if settings.redis.enabled:
 19 |             self.redis_client = redis.Redis(
 20 |                 host=settings.redis.host,
 21 |                 port=settings.redis.port,
 22 |                 db=settings.redis.db,
 23 |             )
 24 | 
 25 |     def _load_json_cache(self) -> OrderedDict:
 26 |         try:
 27 |             if os.path.exists(settings.json_cache_file):
 28 |                 with open(settings.json_cache_file, "r") as f:
 29 |                     data = json.load(f)
 30 |                     return OrderedDict(data)
 31 |         except json.JSONDecodeError:
 32 |             pass
 33 |         return OrderedDict()
 34 | 
 35 |     def _save_json_cache(self, cache: OrderedDict):
 36 |         with open(settings.json_cache_file, "w") as f:
 37 |             json.dump(cache, f)
 38 | 
 39 |     async def get(self, key: str):
 40 |         if settings.redis.enabled:
 41 |             redis_result = self.redis_client.get(key)
 42 |             if redis_result:
 43 |                 return json.loads(redis_result)
 44 | 
 45 |         if self.use_mongo_cache:
 46 |             mongo_result = self.mongo_collection.find_one_and_update(
 47 |                 {"_id": key}, {"$set": {"lastAccessed": datetime.utcnow()}} # Update lastAccessed to track usage time for LRU eviction
 48 |             )
 49 |             if mongo_result:
 50 |                 return mongo_result["response"]
 51 | 
 52 |         if self.use_json_cache:
 53 |             cache = self._load_json_cache()
 54 |             if key in cache:
 55 |                 # Move accessed key to end (most recently used)
 56 |                 cache.move_to_end(key)
 57 |                 self._save_json_cache(cache)
 58 |                 return cache[key]
 59 | 
 60 |         return None
 61 | 
 62 |     async def set(self, key: str, value: str, expire: int = 3600):
 63 |         if settings.redis.enabled:
 64 |             self.redis_client.setex(key, expire, json.dumps(value))
 65 | 
 66 |         if self.use_mongo_cache:
 67 |             count = self.mongo_collection.count_documents({})
 68 |             if count >= self.max_cache_size:
 69 |                 oldest = self.mongo_collection.find_one(
 70 |                     sort=[("lastAccessed", 1)]  # Ascending → oldest first
 71 |                 )
 72 |                 if oldest:
 73 |                     self.mongo_collection.delete_one({"_id": oldest["_id"]})
 74 |                     print(f"Evicted LRU key: {oldest['_id']}")
 75 | 
 76 |             self.mongo_collection.update_one(
 77 |                 {"_id": key},
 78 |                 {"$set": {"response": value, "lastAccessed": datetime.utcnow()}},
 79 |                 upsert=True
 80 |             )
 81 | 
 82 |         if self.use_json_cache:
 83 |             cache = self._load_json_cache()
 84 |             cache[key] = value
 85 |             cache.move_to_end(key)
 86 | 
 87 |             if len(cache) > self.max_cache_size:
 88 |                 evicted_key, _ = cache.popitem(last=False)
 89 |                 print(f"Evicted LRU key: {evicted_key}")
 90 | 
 91 |             self._save_json_cache(cache)
 92 | 
 93 |     async def delete(self, key: str):
 94 |         if settings.redis.enabled:
 95 |             self.redis_client.delete(key)
 96 | 
 97 |         if self.use_mongo_cache:
 98 |             self.mongo_collection.delete_one({"_id": key})
 99 | 
100 |         if self.use_json_cache:
101 |             cache = self._load_json_cache()
102 |             if key in cache:
103 |                 del cache[key]
104 |                 self._save_json_cache(cache)
105 | 


--------------------------------------------------------------------------------
/usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "cache_cool_uri=\"http://127.0.0.1:8000/avalai\"\n",
 10 |     "cache_cool_base_uri=\"http://127.0.0.1:8000/\""
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from dotenv import load_dotenv\n",
 20 |     "load_dotenv()\n",
 21 |     "import os\n",
 22 |     "OPENAI_API_KEY=os.environ['OPENAI_API_KEY']"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# OPENAI_API_KEY=''"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import requests\n",
 41 |     "import json\n",
 42 |     "\n",
 43 |     "# reqUrl = \"https://api.openai.com/v1/chat/completions\"\n",
 44 |     "reqUrl = f\"{cache_cool_uri}/chat/completions\"\n",
 45 |     "\n",
 46 |     "headersList = {\n",
 47 |     " \"Accept\": \"*/*\",\n",
 48 |     " \"User-Agent\": \"Thunder Client (https://www.thunderclient.com)\",\n",
 49 |     " \"Content-Type\": \"application/json\",\n",
 50 |     " \"Authorization\": f\"Bearer {OPENAI_API_KEY}\" \n",
 51 |     "}\n",
 52 |     "\n",
 53 |     "payload = json.dumps({\n",
 54 |     "  \"model\": \"gpt-4o-mini\",\n",
 55 |     "  \"messages\": [\n",
 56 |     "    {\n",
 57 |     "      \"role\": \"user\",\n",
 58 |     "      \"content\": \"Hello, how can I use ChatGPT with Claude?\"\n",
 59 |     "    }\n",
 60 |     "  ],\n",
 61 |     "  \"max_tokens\": 100\n",
 62 |     "})\n",
 63 |     "\n",
 64 |     "response = requests.request(\"POST\", reqUrl, data=payload,  headers=headersList)\n",
 65 |     "\n",
 66 |     "print(response.json())"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from langchain_openai import ChatOpenAI\n",
 76 |     "# from langchain.prompts.chat import , HumanMessage\n",
 77 |     "\n",
 78 |     "# Initialize the chat model\n",
 79 |     "chat = ChatOpenAI(base_url=cache_cool_uri, openai_api_key=OPENAI_API_KEY, model=\"gpt-3.5-turbo\")\n",
 80 |     "\n",
 81 |     "# Create messages\n",
 82 |     "messages = [\n",
 83 |     "    (\"system\",\"You are a helpful assistant.\"),\n",
 84 |     "    (\"user\",\"Can you explain how to use Claude?\")\n",
 85 |     "]\n",
 86 |     "\n",
 87 |     "# Get a response\n",
 88 |     "response = chat(messages)\n",
 89 |     "print(response)\n"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "import openai\n",
 99 |     "# Set your OpenAI API key\n",
100 |     "# openai.api_key =OPENAI_API_KEY\n",
101 |     "\n",
102 |     "cli=openai.OpenAI(\n",
103 |     "    api_key=OPENAI_API_KEY,\n",
104 |     "    base_url=cache_cool_uri\n",
105 |     ")\n",
106 |     "# Define the prompt\n",
107 |     "prompt = \"Hello, how can I use ChatGPT with Claude?\"\n",
108 |     "\n",
109 |     "\n",
110 |     "\n",
111 |     "# Generate a response\n",
112 |     "response =cli.chat.completions.create(\n",
113 |     "    \n",
114 |     "    model=\"gpt-4o-mini\",\n",
115 |     "    messages=[prompt],\n",
116 |     "    max_tokens=100,\n",
117 |     "    n=1,\n",
118 |     "    stop=None,\n",
119 |     "    temperature=0.5,\n",
120 |     ")\n",
121 |     "\n",
122 |     "# Print the generated response\n",
123 |     "print(response.choices[0].message.content)\n"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "import requests\n",
133 |     "response=requests.get(cache_cool_base_uri+'configure')\n",
134 |     "configure_response=response.json()\n",
135 |     "configure_response"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "import requests\n",
145 |     "\n",
146 |     "# Define the URL for the configure endpoint\n",
147 |     "url = \"http://localhost:8000/configure\"  # Replace with the actual URL of your FastAPI server\n",
148 |     "\n",
149 |     "# Complete payload with all required fields\n",
150 |     "payload = {\n",
151 |     "    \"llm_schemas\": {\n",
152 |     "        \"gemini\": {\n",
153 |     "            \"endpoint\": \"https://api.google.com/v1/chat/completions\",\n",
154 |     "            \"headers\": [\n",
155 |     "                \"Content-Type: application/json\",\n",
156 |     "                \"Authorization: Bearer {api_key}\"\n",
157 |     "            ],\n",
158 |     "            \"temperature_threshold\": 0.8\n",
159 |     "        }\n",
160 |     "    },\n",
161 |     "    \"mongodb\": {\n",
162 |     "        \"uri\": \"mongodb://localhost:27017\",\n",
163 |     "        \"db_name\": \"test_db\",\n",
164 |     "        \"collection_name\": \"test_collection\"\n",
165 |     "    },\n",
166 |     "    \"json_cache_file\": \"cache.json\",\n",
167 |     "    \"redis\": {\n",
168 |     "        \"enabled\": False,\n",
169 |     "        \"host\": \"localhost\",\n",
170 |     "        \"port\": 6379,\n",
171 |     "        \"db\": 0\n",
172 |     "    },\n",
173 |     "    \"current_llm_service\": \"gemini\",\n",
174 |     "    \"use_json_cache\": True,\n",
175 |     "    \"use_mongo_cache\": True\n",
176 |     "}\n",
177 |     "\n",
178 |     "payload['llm_schemas'].update(configure_response[\"llm_schemas\"])\n",
179 |     "# Send the PUT request with the complete payload as JSON\n",
180 |     "response = requests.put(url, json=payload)\n",
181 |     "\n",
182 |     "# Print the response from the server\n",
183 |     "if response.status_code == 200:\n",
184 |     "    print(\"Configuration updated successfully:\", response.json())\n",
185 |     "else:\n",
186 |     "    print(f\"Failed to update configuration. Status code: {response.status_code}\")\n",
187 |     "    print(\"Response:\", response.text)\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": []
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "kaggle_agent",
201 |    "language": "python",
202 |    "name": "python3"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 3
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython3",
214 |    "version": "3.11.9"
215 |   }
216 |  },
217 |  "nbformat": 4,
218 |  "nbformat_minor": 2
219 | }
220 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🌟 Cache Cool
  2 | 
  3 | **Cache-Cool** is a simple LLM (Large Language Model) caching proxy for saving your LLM calls. It acts as a caching layer for LLM API calls, such as OpenAI or Claude, to improve performance and reduce costs by avoiding redundant requests to the LLM providers. The caching is implemented using both MongoDB and JSON files.
  4 | 
  5 | ## 📌 Project Details
  6 | 
  7 | - **GitHub Repository**: [https://github.com/msnp1381/Cache-Cool](https://github.com/msnp1381/Cache-Cool)
  8 | - **Project Name**: Cache-Cool
  9 | - **Project Description**: A simple LLM caching proxy for saving your LLM calls.
 10 | 
 11 | ## 🚀 Features
 12 | 
 13 | - **💾 Cache Responses**: Caches responses from LLM API calls to reduce redundancy.
 14 | - **⚙️ Dynamic Configuration**: Allows dynamic configuration of LLM service and caching mechanisms via the `/configure` endpoint.
 15 | - **🔄 Supports Multiple LLMs**: Configurable to support different LLM services (e.g., OpenAI, Claude, Groq).
 16 | - **📂 Uses MongoDB and JSON for Caching**: Leverages both MongoDB and JSON files for caching API responses.
 17 | - **♻️ Implements LRU eviction for JSON and Mongo caches.**
 18 | - **⚡ Redis caching relies on Redis's default LRU mechanism.**
 19 | 
 20 | 
 21 | 
 22 | ## 📡 Endpoints
 23 | 
 24 | - **POST /{schema_name}/chat/completions**:
 25 | 
 26 | > ***schema_name*** is defined in **confing.yaml**
 27 | 
 28 | Forwards chat completion requests to the configured LLM service or returns cached responses.
 29 | 
 30 | - **GET /configure**: Retrieves current configuration details.
 31 | - **PUT /configure**: Updates configuration settings dynamically.
 32 | 
 33 | ## 🛠️ Getting Started
 34 | 
 35 | ### Prerequisites
 36 | 
 37 | Before you start, make sure you have:
 38 | 
 39 | - **🐳 Docker**: Installed on your system. [Download Docker here](https://www.docker.com/products/docker-desktop)
 40 | - **🍃 MongoDB**: A running MongoDB instance for caching (local or remote).
 41 | - **🍅 Redis**: Optional A running Redis instance for caching.
 42 | 
 43 | ### 📥 Installation
 44 | 
 45 | #### Option 1: Using Docker
 46 | 
 47 | 1. **Clone the repository**:
 48 | 
 49 |    First, download the project files:
 50 | 
 51 |    ```bash
 52 |    git clone https://github.com/msnp1381/cache-cool.git
 53 |    cd cache-cool
 54 |    ```
 55 | 
 56 | 2. **Build the Docker Image**:
 57 | 
 58 |    Now, create a Docker image for the project:
 59 | 
 60 |    ```bash
 61 |    docker build -t cache-cool .
 62 |    ```
 63 | 
 64 | 3. **Run the Docker Container**:
 65 | 
 66 |    Make sure MongoDB is running and accessible. Update the `config.yaml` with your MongoDB connection details, then run:
 67 | 
 68 |    ```bash
 69 |    docker run -p 8000:8000 --env-file .env cache-cool
 70 |    ```
 71 | 
 72 |    Replace `.env` with your environment file containing necessary environment variables (like MongoDB URI).
 73 | 
 74 | 4. **Access the Application**:
 75 | 
 76 |    Open your browser and go to [http://localhost:8000](http://localhost:8000) to start using Cache-Cool!
 77 | 
 78 | #### Option 2: Using `requirements.txt` and Running Locally
 79 | 
 80 | 1. **Clone the repository**:
 81 | 
 82 |    First, download the project files:
 83 | 
 84 |    ```bash
 85 |    git clone https://github.com/msnp1381/cache-cool.git
 86 |    cd cache-cool
 87 |    ```
 88 | 
 89 | 2. **Install Python Dependencies**:
 90 | 
 91 |    If you prefer using `requirements.txt`, install the dependencies as follows:
 92 | 
 93 |    ```bash
 94 |    python3 -m venv venv
 95 |    source venv/bin/activate   # On Windows use `venv\Scripts\activate`
 96 |    pip install -r requirements.txt
 97 |    ```
 98 | 
 99 | 3. **Run the Application with Uvicorn**:
100 | 
101 |    Start the FastAPI application using Uvicorn:
102 | 
103 |    ```bash
104 |    uvicorn app.main:app --reload
105 |    ```
106 | 
107 |    This will start the server at [http://localhost:8000](http://localhost:8000).
108 | 
109 | ### ⚙️ Configuration
110 | 
111 | cache-cool uses a `config.yaml` file for initial configuration. You can also update configurations dynamically using the `/configure` endpoint.
112 | 
113 | #### Example config.yaml
114 | 
115 | ```yaml
116 | llm_schemas:
117 |   openai:
118 |     endpoint: "https://api.openai.com/v1/chat/completions"
119 |     headers:
120 |       - "Content-Type: application/json"
121 |       - "Authorization: Bearer {api_key}"
122 |     temperature_threshold: 0.8
123 |   claude:
124 |     endpoint: "https://api.claude.ai/v1/chat/completions"
125 |     headers:
126 |       - "Content-Type: application/json"
127 |       - "Authorization: Bearer {api_key}"
128 |     temperature_threshold: 0.85
129 |   avalai:
130 |     endpoint: "https://api.avalapis.ir/v1/chat/completions"
131 |     headers:
132 |       - "Content-Type: application/json"
133 |       - "Authorization: {api_key}"
134 |     temperature_threshold: 0.85
135 |   groq:
136 |     endpoint: "https://api.groq.com/openai/v1/chat/completions"
137 |     headers:
138 |       - "Content-Type: application/json"
139 |       - "Authorization: {api_key}"
140 |     temperature_threshold: 0.8
141 | 
142 | mongodb:
143 |   uri: "mongodb://localhost:27017"
144 |   db_name: "llm_cache_db"
145 |   collection_name: "cache"
146 | 
147 | json_cache_file: "cache.json"
148 | 
149 | redis:
150 |   enabled: false
151 |   host: "localhost"
152 |   port: 6379
153 |   db: 0
154 | 
155 | current_llm_service: "openai"
156 | use_json_cache: true
157 | use_mongo_cache: true
158 | cache_max_size: 3
159 | ```
160 | 
161 | ### ♻️ LRU Caching (Least Recently Used)
162 | 
163 | Cache-Cool supports Least Recently Used (LRU) eviction to keep the cache size manageable and efficient.
164 | 
165 | - For **JSON file** and **MongoDB** caching, LRU is implemented by tracking the last access time of cache entries.
166 | - You must enable `use_json_cache` or `use_mongo_cache` and set `cache_max_size` in your `config.yaml` to activate LRU eviction. For example:
167 | 
168 | ```yaml
169 | use_json_cache: true
170 | use_mongo_cache: true
171 | cache_max_size: 3
172 | ```
173 | 
174 | When the number of cached items exceeds `cache_max_size`, the least recently accessed item is automatically evicted.
175 | 
176 | - For **Redis**, Cache-Cool relies on Redis’s built-in LRU eviction policies. To enable LRU in Redis, configure your `redis.conf` or via command line with:
177 | 
178 | ```bash
179 | maxmemory 100mb               # Set maximum Redis memory usage (adjust as needed)
180 | maxmemory-policy allkeys-lru  # Use LRU eviction policy when maxmemory is exceeded
181 | sudo systemctl restart redis  # Restart Redis to apply changes
182 | ```
183 | 
184 | Make sure Redis caching is enabled in your `config.yaml`:
185 | 
186 | ```yaml
187 | redis:
188 |   enabled: true
189 |   host: "localhost"
190 |   port: 6379
191 |   db: 0
192 | ```
193 | 
194 | This way, Redis handles eviction automatically without Cache-Cool implementing it explicitly.
195 | 
196 | 
197 | ### 📡 API Usage
198 | 
199 | Here’s how to use the API once the service is running:
200 | 
201 | #### Example Request
202 | 
203 | look at **usage.ipynb**
204 | 
205 | ### 🤝 Contributing
206 | 
207 | We welcome contributions! Here’s how you can help:
208 | 
209 | 1. Fork the repository.
210 | 2. Create a new branch (`git checkout -b feature-branch`).
211 | 3. Make your changes.
212 | 4. Commit your changes (`git commit -am 'Add some feature'`).
213 | 5. Push to the branch (`git push origin feature-branch`).
214 | 6. Create a new Pull Request.
215 | 
216 | This project includes contributions from:
217 | 
218 | - [Lior Soffer](https://github.com/LiorSoffer)
219 | - [Ariel Hartal](https://github.com/arielhartal)
220 | - [Adi Fayer](https://github.com/adif97)
221 | 
222 | 
223 | ### 📜 License
224 | 
225 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
226 | 
227 | ### 📧 Contact
228 | 
229 | If you have any questions or issues, feel free to contact us at [mohamadnematpoor@gmail.com](mailto:mohamadnematpoor@gmail.com).
230 | 
231 | Happy caching! 🚀
232 | 


--------------------------------------------------------------------------------