├── src ├── ai │ ├── __init__.py │ ├── base.py │ └── gemini.py ├── auth │ ├── __init__.py │ ├── dependencies.py │ └── throttling.py ├── prompts │ └── system_prompt.md └── main.py ├── .gitignore ├── requirements.txt └── README.md /src/ai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/auth/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | .cursor -------------------------------------------------------------------------------- /src/prompts/system_prompt.md: -------------------------------------------------------------------------------- 1 | Answer the user in plaintext (no markdown), but use lots of emojis! Be simple, clear and concise. 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi==0.115.12 2 | uvicorn==0.34.3 3 | pydantic==2.11.5 4 | google-generativeai==0.8.5 5 | python-jose[cryptography]==3.5.0 -------------------------------------------------------------------------------- /src/ai/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class AIPlatform(ABC): 5 | @abstractmethod 6 | def chat(self, prompt: str) -> str: 7 | pass 8 | -------------------------------------------------------------------------------- /src/ai/gemini.py: -------------------------------------------------------------------------------- 1 | import os 2 | import google.generativeai as genai 3 | from .base import AIPlatform 4 | 5 | 6 | class Gemini(AIPlatform): 7 | def __init__(self, api_key: str, system_prompt: str = None): 8 | self.api_key = api_key 9 | self.system_prompt = system_prompt 10 | genai.configure(api_key=self.api_key) 11 | 12 | # See more models at: https://ai.google.dev/gemini-api/docs/models 13 | self.model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20") 14 | 15 | def chat(self, prompt: str) -> str: 16 | if self.system_prompt: 17 | prompt = f"{self.system_prompt}\n\n{prompt}" 18 | 19 | response = self.model.generate_content(prompt) 20 | return response.text 21 | -------------------------------------------------------------------------------- /src/auth/dependencies.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from fastapi import Depends, HTTPException, status 3 | from fastapi.security import OAuth2PasswordBearer 4 | from jose import JWTError, jwt 5 | 6 | SECRET_KEY = "a-string-secret-at-least-256-bits-long" 7 | ALGORITHM = "HS256" 8 | 9 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token", auto_error=False) 10 | 11 | 12 | async def get_user_identifier(token: Optional[str] = Depends(oauth2_scheme)): 13 | if token is None: 14 | return "global_unauthenticated_user" 15 | 16 | credentials_exception = HTTPException( 17 | status_code=status.HTTP_401_UNAUTHORIZED, 18 | detail="Could not validate credentials", 19 | headers={"WWW-Authenticate": "Bearer"}, 20 | ) 21 | try: 22 | payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM]) 23 | username: str = payload.get("sub") 24 | if username is None: 25 | raise credentials_exception 26 | except JWTError: 27 | raise credentials_exception 28 | return username 29 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fastapi import Depends, FastAPI 3 | from pydantic import BaseModel 4 | from .ai.gemini import Gemini 5 | from .auth.dependencies import get_user_identifier 6 | from .auth.throttling import apply_rate_limit 7 | 8 | 9 | # --- App Initialization --- 10 | app = FastAPI() 11 | 12 | 13 | # --- AI Configuration --- 14 | def load_system_prompt(): 15 | try: 16 | with open("src/prompts/system_prompt.md", "r") as f: 17 | return f.read() 18 | except FileNotFoundError: 19 | return None 20 | 21 | 22 | system_prompt = load_system_prompt() 23 | gemini_api_key = os.getenv("GEMINI_API_KEY") 24 | 25 | if not gemini_api_key: 26 | raise ValueError("GEMINI_API_KEY environment variable not set.") 27 | 28 | ai_platform = Gemini(api_key=gemini_api_key, system_prompt=system_prompt) 29 | 30 | 31 | # --- Pydantic Models --- 32 | class ChatRequest(BaseModel): 33 | prompt: str 34 | 35 | 36 | class ChatResponse(BaseModel): 37 | response: str 38 | 39 | 40 | # --- API Endpoints --- 41 | @app.post("/chat", response_model=ChatResponse) 42 | async def chat(request: ChatRequest, user_id: str = Depends(get_user_identifier)): 43 | apply_rate_limit(user_id) 44 | response_text = ai_platform.chat(request.prompt) 45 | return ChatResponse(response=response_text) 46 | 47 | 48 | @app.get("/") 49 | async def root(): 50 | return {"message": "API is running"} 51 | -------------------------------------------------------------------------------- /src/auth/throttling.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | 4 | from fastapi import HTTPException, status 5 | 6 | # --- Constants --- 7 | # For authenticated users 8 | AUTH_RATE_LIMIT = 5 9 | AUTH_TIME_WINDOW_SECONDS = 60 10 | 11 | # For unauthenticated "global" users 12 | GLOBAL_RATE_LIMIT = 3 13 | GLOBAL_TIME_WINDOW_SECONDS = 60 14 | 15 | # --- In-memory storage for user requests --- 16 | user_requests = defaultdict(list) 17 | 18 | 19 | # --- Throttling dependency --- 20 | def apply_rate_limit(user_id: str): 21 | current_time = time.time() 22 | 23 | if user_id == "global_unauthenticated_user": 24 | rate_limit = GLOBAL_RATE_LIMIT 25 | time_window = GLOBAL_TIME_WINDOW_SECONDS 26 | else: 27 | rate_limit = AUTH_RATE_LIMIT 28 | time_window = AUTH_TIME_WINDOW_SECONDS 29 | 30 | # Filter out requests older than the time window 31 | user_requests[user_id] = [ 32 | t for t in user_requests[user_id] if t > current_time - time_window 33 | ] 34 | 35 | if len(user_requests[user_id]) >= rate_limit: 36 | raise HTTPException( 37 | status_code=status.HTTP_429_TOO_MANY_REQUESTS, 38 | detail="Too many requests. Please try again later.", 39 | ) 40 | else: 41 | # For debugging: print current usage 42 | current_usage = len(user_requests[user_id]) 43 | print(f"User {user_id}: {current_usage + 1}/{rate_limit} requests used.") 44 | 45 | user_requests[user_id].append(current_time) 46 | return True 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastAPI Gemini AI 2 | 3 | A simple FastAPI project to demonstrate how to build an API server for an AI application using Gemini. 4 | 5 | ## Setting Up 6 | 7 | **Create a virtual environment** 8 | 9 | ```bash 10 | python -m venv venv 11 | source venv/bin/activate 12 | ``` 13 | 14 | **Install dependencies** 15 | 16 | ```bash 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | **Set up API Key** 21 | 22 | Set your Gemini API key as an environment variable in your terminal. You can get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey). 23 | 24 | ```bash 25 | export GEMINI_API_KEY="your_gemini_api_key" 26 | ``` 27 | 28 | For Windows users: 29 | 30 | ```batch 31 | :: In Command Prompt 32 | set GEMINI_API_KEY="your_gemini_api_key" 33 | 34 | :: In PowerShell 35 | $env:GEMINI_API_KEY="your_gemini_api_key" 36 | ``` 37 | 38 | The application will load this key from your environment. 39 | 40 | ## How To Run 41 | 42 | To run the application, use the following command: 43 | 44 | ```bash 45 | uvicorn src.main:app --reload 46 | ``` 47 | 48 | The API will be available at `http://127.0.0.1:8000`. 49 | 50 | ### Making Requests 51 | 52 | You can send a request to the chat API without an authentication token. These requests are subject to a global rate limit. 53 | 54 | #### Unauthenticated Request 55 | 56 | You can send a request to the chat API without an authentication token. These requests are subject to a global rate limit. 57 | 58 | ```bash 59 | curl -X POST "http://127.0.0.1:8000/chat" \ 60 | -H "Content-Type: application/json" \ 61 | -d '{"prompt": "Why is the sky blue?"}' 62 | ``` 63 | 64 | #### Authenticated Request 65 | 66 | For a higher rate limit, you can authenticate by providing a JWT token. Make sure to replace `YOUR_GENERATED_TOKEN` with a valid token. 67 | 68 | ```bash 69 | curl -X POST "http://127.0.0.1:8000/chat" \ 70 | -H "Content-Type: application/json" \ 71 | -H "Authorization: Bearer YOUR_GENERATED_TOKEN" \ 72 | -d '{"prompt": "Why is the sky blue?"}' 73 | ``` 74 | 75 | ### Generating a Test Token 76 | 77 | The `/chat` endpoint is protected and requires a JWT token for authentication. For testing purposes, you can generate a valid token using [jwt.io](https://jwt.io/): 78 | 79 | **Algorithm**: Change the algorithm to `HS256`. 80 | 81 | **Payload**: Use the following payload. The `sub` field will be used as the user identifier for rate limiting. 82 | 83 | ```json 84 | { 85 | "sub": "testuser", 86 | "name": "John Doe", 87 | "iat": 1516239022 88 | } 89 | ``` 90 | 91 | **Signature**: In the "Verify Signature" section, use the secret key `a-string-secret-at-least-256-bits-long`. This is the same secret key that is hardcoded in `src/auth/dependencies.py`. 92 | 93 | You can now use the generated token to make authenticated requests. 94 | 95 | ### Via FastAPI Docs 96 | 97 | You can also use the auto-generated FastAPI documentation to interact with the API. 98 | 99 | Once the server is running, go to [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) in your browser. 100 | 101 | ## Configuration 102 | 103 | You can configure the system prompt by editing the `src/prompts/system_prompt.md` file. 104 | 105 | ### Rate Limiting 106 | 107 | The API implements rate limiting to prevent abuse. You can modify these limits by changing the constants in `src/auth/throttling.py`: 108 | 109 | ```python 110 | GLOBAL_RATE_LIMIT = 3 111 | GLOBAL_TIME_WINDOW_SECONDS = 60 112 | ``` 113 | 114 | ## Architecture 115 | 116 | The project is structured to be modular, allowing for different AI platforms to be used. 117 | 118 | - `src/main.py`: The main FastAPI application file. 119 | - `src/ai/base.py`: Defines the `AIPlatform` interface. 120 | - `src/ai/gemini.py`: The implementation of the `AIPlatform` interface for Gemini. 121 | - `src/prompts/system_prompt.md`: The system prompt for the AI. 122 | - `src/auth/dependencies.py`: Handles JWT decoding and user identification. 123 | - `src/auth/throttling.py`: Provides a simple in-memory rate limiter with different limits for authenticated and unauthenticated users. 124 | 125 | To use a different AI, you would create a new class that inherits from `AIPlatform` and implement the `chat` method. Then, you would update `src/main.py` to use your new AI class. 126 | --------------------------------------------------------------------------------