├── .streamlit └── config.toml ├── .gitignore ├── requirements.txt ├── data_processor.py ├── api_client.py ├── README.md └── app.py /.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | base="light" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | devenv.sh 3 | .venv 4 | *.csv 5 | .DS_Store -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | httpx 3 | pandas 4 | stqdm 5 | async-lru 6 | numpy<2.0 7 | loguru -------------------------------------------------------------------------------- /data_processor.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from datetime import datetime, timezone 3 | from typing import List, Dict, Any 4 | 5 | 6 | def unix_to_date_string(unix_timestamp: int) -> str: 7 | """Convert Unix timestamp to YYYY-MM-DD format""" 8 | return datetime.fromtimestamp(unix_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") 9 | 10 | 11 | def flatten_usage_data(raw_data: list, endpoint_name: str) -> list: 12 | """Flatten API response into list of dicts""" 13 | # This function is now handled in the API client, but keeping for consistency 14 | return raw_data 15 | 16 | 17 | def enrich_dataframe_with_lookups(df: pd.DataFrame, user_lookup: Dict[str, str], 18 | project_lookup: Dict[str, str], api_key_lookup: Dict[str, str]) -> pd.DataFrame: 19 | """Add human-readable names to the dataframe based on ID lookups""" 20 | if df.empty: 21 | return df 22 | 23 | # Add user name column 24 | if "user_id" in df.columns: 25 | df["user_email"] = df["user_id"].map(user_lookup).fillna("Unknown") 26 | 27 | # Add project name column 28 | if "project_id" in df.columns: 29 | df["project_name"] = df["project_id"].map(project_lookup).fillna("Unknown") 30 | 31 | # Add API key name column 32 | if "api_key_id" in df.columns: 33 | df["api_key_name"] = df["api_key_id"].map(api_key_lookup).fillna("Unknown") 34 | 35 | return df 36 | 37 | 38 | def reorder_columns(df: pd.DataFrame) -> pd.DataFrame: 39 | """Reorder columns with specified columns first, followed by remaining columns""" 40 | if df.empty: 41 | return df 42 | 43 | # Desired column order (leftmost columns) 44 | preferred_order = [ 45 | "date", "endpoint_type", "input_tokens", "output_tokens", 46 | "api_key_id", "api_key_name", "project_id", "project_name", 47 | "input_cached_tokens", "input_audio_tokens", "output_audio_tokens", 48 | "model", "start_time", "user_email", "num_model_requests" 49 | ] 50 | 51 | # Banned columns 52 | banned_columns = [ 53 | "object", "user_id", "batch" 54 | ] 55 | 56 | # Get columns that exist in the dataframe from the preferred order 57 | existing_preferred = [col for col in preferred_order if col in df.columns] 58 | 59 | # Get remaining columns not in the preferred order (those which are not banned) 60 | remaining_columns = [col for col in df.columns if (col not in preferred_order and col not in banned_columns)] 61 | 62 | # Combine preferred columns (leftmost) with remaining columns (rightmost) 63 | final_order = existing_preferred + remaining_columns 64 | 65 | return df[final_order] 66 | 67 | 68 | def create_dataframe(all_data: List[Dict[str, Any]]) -> pd.DataFrame: 69 | """Convert list to DataFrame and sort by time""" 70 | if not all_data: 71 | return pd.DataFrame() 72 | 73 | df = pd.DataFrame(all_data) 74 | 75 | # Convert start_time to date string format (in UTC time, ignore local timezone) 76 | if "start_time" in df.columns: 77 | df["date"] = df["start_time"].apply(unix_to_date_string) 78 | df = df.sort_values("start_time") 79 | 80 | return df -------------------------------------------------------------------------------- /api_client.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from async_lru import alru_cache 3 | import asyncio 4 | import json 5 | from loguru import logger 6 | from typing import Dict, List, Any 7 | 8 | 9 | class OpenAIUsageAPIClient: 10 | def __init__(self, api_key: str): 11 | self.api_key = api_key 12 | self.client = httpx.AsyncClient( 13 | headers={ 14 | "Authorization": f"Bearer {api_key}", 15 | "Content-Type": "application/json" 16 | }, 17 | timeout=30.0 18 | ) 19 | 20 | async def _fetch_page(self, endpoint: str, params: dict) -> dict: 21 | """Single page fetch with error handling""" 22 | response = await self.client.get(endpoint, params=params) 23 | response.raise_for_status() 24 | return response.json() 25 | 26 | @alru_cache(maxsize=128) 27 | async def fetch_all_pages(self, endpoint_name: str, endpoint_url: str, params_json: str) -> List[Dict[str, Any]]: 28 | """Paginated fetch helper that loops through all pages""" 29 | logger.debug(f"Fetching all pages for {endpoint_name} with params: {params_json}") 30 | # Convert JSON string back to dict for processing 31 | params = json.loads(params_json) 32 | 33 | all_results = [] 34 | current_params = params.copy() 35 | 36 | while True: 37 | response_data = await self._fetch_page(endpoint_url, current_params) 38 | 39 | # Process each bucket in the response 40 | for bucket in response_data.get("data", []): 41 | start_time = bucket.get("start_time") 42 | results = bucket.get("results", []) 43 | 44 | # Each result object becomes a row with inherited start_time 45 | for result in results: 46 | row = result.copy() 47 | row["start_time"] = start_time 48 | row["endpoint_type"] = endpoint_name 49 | all_results.append(row) 50 | 51 | # Check for pagination 52 | if not response_data.get("has_more", False): 53 | break 54 | 55 | next_page = response_data.get("next_page") 56 | if not next_page: 57 | break 58 | 59 | current_params["page"] = next_page 60 | 61 | return all_results 62 | 63 | @alru_cache(maxsize=32) 64 | async def fetch_lookup_data(self, lookup_type: str, endpoint_url: str) -> Dict[str, str]: 65 | """Fetch lookup data for users, projects, or API keys""" 66 | all_items = [] 67 | params = {"limit": 100} 68 | 69 | while True: 70 | response_data = await self._fetch_page(endpoint_url, params) 71 | all_items.extend(response_data.get("data", [])) 72 | 73 | if not response_data.get("has_more", False): 74 | break 75 | 76 | # Use the last_id for pagination (different from usage endpoints) 77 | last_id = response_data.get("last_id") 78 | if not last_id: 79 | break 80 | params["after"] = last_id 81 | 82 | # Create ID to name mapping based on lookup type 83 | lookup_map = {} 84 | for item in all_items: 85 | if lookup_type == "users": 86 | lookup_map[item.get("id")] = item.get("email", "Unknown") 87 | elif lookup_type == "projects": 88 | lookup_map[item.get("id")] = item.get("name", "Unknown") 89 | elif lookup_type == "api_keys": 90 | lookup_map[item.get("id")] = item.get("name", "Unknown") 91 | 92 | return lookup_map 93 | 94 | async def close(self): 95 | """Cleanup method for httpx client""" 96 | await self.client.aclose() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenAI Usage API Dashboard 2 | 3 | A powerful Streamlit application for fetching, analyzing, and exporting usage data from OpenAI's comprehensive Usage API across all available endpoints. 4 | 5 | ## 🚀 What This App Does 6 | 7 | This dashboard provides a unified interface to query OpenAI's Usage API endpoints and generate comprehensive usage reports. It fetches data from multiple usage endpoints simultaneously, enriches the data with human-readable names, and exports everything to CSV for further analysis. 8 | 9 | ### Supported Usage Endpoints 10 | 11 | - **Completions** - Text generation usage (input/output tokens, cached tokens, audio tokens) 12 | - **Embeddings** - Text embedding usage 13 | - **Moderations** - Content moderation usage 14 | - **Images** - Image generation/editing usage 15 | - **Audio Speeches** - Text-to-speech usage 16 | - **Audio Transcriptions** - Speech-to-text usage 17 | - **Vector Stores** - Vector storage usage 18 | - **Code Interpreter Sessions** - Code execution session usage 19 | 20 | ## ✨ Key Features 21 | 22 | - **Async Processing**: Fetches data from multiple endpoints simultaneously with real-time progress tracking 23 | - **Smart Caching**: Uses LRU cache to avoid redundant API calls and improve performance 24 | - **Data Enrichment**: Automatically looks up and adds human-readable names for users, projects, and API keys 25 | - **Flexible Date Ranges**: Select any date range for usage analysis 26 | - **Endpoint Selection**: Choose which usage endpoints to query 27 | - **Column Reordering**: Presents data in a logical, analysis-friendly column order 28 | - **CSV Export**: Download complete usage data for further analysis 29 | - **Session Persistence**: Maintains API client and cache across app refreshes 30 | 31 | ## 🛠️ Installation & Setup 32 | 33 | ### Prerequisites 34 | 35 | - Python 3.8+ 36 | - OpenAI Admin API Key 37 | 38 | ### Install Dependencies 39 | 40 | ```bash 41 | pip install -r requirements.txt 42 | ``` 43 | 44 | ### Environment Setup (Optional) 45 | 46 | Set your OpenAI Admin API key as an environment variable: 47 | 48 | ```bash 49 | export OPENAI_ADMIN_KEY="sk-admin-your-key-here" 50 | ``` 51 | 52 | Or create a `devenv.sh` file: 53 | ```bash 54 | OPENAI_ADMIN_KEY="sk-admin-your-key-here" 55 | source devenv.sh 56 | ``` 57 | 58 | ## 🎯 How to Use 59 | 60 | ### 1. Start the Application 61 | 62 | ```bash 63 | streamlit run app.py 64 | ``` 65 | 66 | ### 2. Configure Your API Key 67 | 68 | - If you set the `OPENAI_ADMIN_KEY` environment variable, it will be pre-populated 69 | - Otherwise, enter your OpenAI Admin API key in the sidebar 70 | 71 | ### 3. Select Your Query Parameters 72 | 73 | - **Date Range**: Choose start and end dates (treated as UTC) 74 | - **Usage Endpoints**: Select which usage types to fetch (all selected by default) 75 | 76 | ### 4. Fetch Data 77 | 78 | Click "Fetch Usage Data" to: 79 | - Query selected endpoints asynchronously 80 | - Fetch lookup data for users, projects, and API keys 81 | - Process and enrich the data 82 | - Display results with progress tracking 83 | 84 | ### 5. Analyze and Export 85 | 86 | - Review the data in the interactive table 87 | - Download as CSV for further analysis in Excel, Google Sheets, or other tools 88 | 89 | ## 📊 Data Output 90 | 91 | The final dataset includes: 92 | 93 | ### Core Usage Data 94 | - Date, endpoint type, token counts, model requests 95 | - Input/output tokens, cached tokens, audio tokens 96 | - Model names, batch status 97 | 98 | ### Enriched Information 99 | - **User Details**: User ID → Email address 100 | - **Project Details**: Project ID → Project name 101 | - **API Key Details**: API Key ID → API key name 102 | 103 | ### Column Order 104 | Data is presented with the most important columns first: date, endpoint_type, input_tokens, output_tokens, api_key_id, api_key_name, project_id, project_name, and other relevant metrics. 105 | 106 | ## 🔧 Advanced Features 107 | 108 | ### Cache Management 109 | - Use the "Clear All Cache" button in the sidebar to reset cached data 110 | - Cache persists across app refreshes for improved performance 111 | - View cache statistics when `LOGURU_LEVEL=DEBUG` is set 112 | 113 | ### Special Endpoint Handling 114 | - Vector stores and code interpreter sessions are automatically configured with appropriate grouping parameters 115 | - All other endpoints group by user_id, project_id, api_key_id, and model for maximum granularity 116 | 117 | ## 🚨 Requirements 118 | 119 | - Valid OpenAI Admin API key with usage access 120 | - Network access to OpenAI's API endpoints 121 | - Python packages listed in `requirements.txt` 122 | 123 | ## 📝 Notes 124 | 125 | - Dates are treated as UTC when converting to timestamps 126 | - The app handles pagination automatically across all endpoints 127 | - Empty results are handled gracefully with appropriate user feedback 128 | - All errors are captured and displayed with detailed tracebacks 129 | 130 | ## 🤝 Usage Tips 131 | 132 | - Run similar queries multiple times to benefit from caching 133 | - Use the debug mode to monitor cache performance 134 | - Select specific endpoints if you only need certain usage types 135 | - Export data regularly for historical analysis and reporting 136 | 137 | --- 138 | 139 | Built with ❤️ using Streamlit, httpx, and async processing for optimal performance. -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import asyncio 3 | import os 4 | import json 5 | from datetime import datetime, date, timedelta, timezone 6 | import pandas as pd 7 | import traceback 8 | from io import BytesIO 9 | 10 | from stqdm import stqdm 11 | from api_client import OpenAIUsageAPIClient 12 | from data_processor import create_dataframe, enrich_dataframe_with_lookups, reorder_columns 13 | 14 | # API endpoints configuration 15 | API_ENDPOINTS = { 16 | "completions": "https://api.openai.com/v1/organization/usage/completions", 17 | "embeddings": "https://api.openai.com/v1/organization/usage/embeddings", 18 | "moderations": "https://api.openai.com/v1/organization/usage/moderations", 19 | "images": "https://api.openai.com/v1/organization/usage/images", 20 | "audio_speeches": "https://api.openai.com/v1/organization/usage/audio_speeches", 21 | "audio_transcriptions": "https://api.openai.com/v1/organization/usage/audio_transcriptions", 22 | "vector_stores": "https://api.openai.com/v1/organization/usage/vector_stores", 23 | "code_interpreter_sessions": "https://api.openai.com/v1/organization/usage/code_interpreter_sessions" 24 | } 25 | 26 | # Lookup endpoints for ID-to-name mapping 27 | LOOKUP_ENDPOINTS = { 28 | "users": "https://api.openai.com/v1/organization/users", 29 | "projects": "https://api.openai.com/v1/organization/projects", 30 | "api_keys": "https://api.openai.com/v1/organization/admin_api_keys" 31 | } 32 | 33 | 34 | def date_to_unix_timestamp(date_obj: date) -> int: 35 | """Convert date to Unix timestamp, treating the date as UTC""" 36 | # Create a datetime object and explicitly set timezone to UTC 37 | dt_utc = datetime.combine(date_obj, datetime.min.time(), timezone.utc) 38 | return int(dt_utc.timestamp()) 39 | 40 | 41 | async def main_async(): 42 | # Page configuration 43 | st.set_page_config( 44 | page_title="OpenAI Usage API Dashboard", 45 | page_icon="📊", 46 | layout="wide", 47 | initial_sidebar_state="expanded" 48 | ) 49 | 50 | # Sidebar with API key 51 | st.sidebar.header("Configuration") 52 | 53 | # Check environment for API key and pre-populate 54 | env_api_key = os.getenv("OPENAI_ADMIN_KEY", "") 55 | api_key = st.sidebar.text_input( 56 | "OpenAI Admin API Key", 57 | value=env_api_key, 58 | type="password", 59 | help="Enter your [OpenAI Admin API key](https://platform.openai.com/settings/organization/admin-keys). If OPENAI_ADMIN_KEY environment variable is set, it will be pre-populated." 60 | ) 61 | 62 | # Author's security note 63 | st.sidebar.markdown(""" 64 | _**Author's Note:** While I can only claim that your credentials are not stored anywhere, for maximum security, you should generate a new app-specific and read only [Admin API key](https://platform.openai.com/settings/organization/admin-keys) on your account and use it here. This way, you can deactivate the key after you don't plan to use the app anymore, and it won't affect any of your other keys/apps. You can check out the GitHub source for this app using below button:_ 65 | """) 66 | 67 | # GitHub repository badge 68 | st.sidebar.markdown(""" 69 | [![GitHub](https://img.shields.io/github/stars/tipani86/OpenAI-Tools)](https://github.com/tipani86/OpenAI-Tools) 70 | """) 71 | 72 | # Cache management 73 | st.sidebar.header("Cache Management") 74 | if st.sidebar.button("🗑️ Clear All Cache", help="Clear cached API responses and session data"): 75 | # Clear the alru cache if client exists 76 | if 'api_client' in st.session_state: 77 | st.session_state.api_client.fetch_all_pages.cache_clear() 78 | # Clear session state 79 | st.session_state.clear() 80 | st.sidebar.success("Cache and session data cleared successfully!") 81 | 82 | # Main content 83 | st.title("📊 OpenAI Usage API Dashboard") 84 | st.markdown("Fetch and analyze usage data from OpenAI's Usage API across all endpoints.") 85 | 86 | # Form for user inputs 87 | with st.form("usage_form"): 88 | st.subheader("Settings") 89 | 90 | # Date range picker 91 | col1, col2 = st.columns(2) 92 | with col1: 93 | start_date = st.date_input( 94 | "Start Date (inclusive)", 95 | value=date.today() - timedelta(days=7), 96 | help="Start date for usage data (inclusive)" 97 | ) 98 | with col2: 99 | end_date = st.date_input( 100 | "End Date (**exclusive**; for monthly reports, select the first day of the _second_ month)", 101 | value=date.today(), 102 | help="End date for usage data (exclusive)" 103 | ) 104 | 105 | # Usage endpoints selection 106 | selected_endpoints = st.multiselect( 107 | "Usage Endpoints", 108 | options=list(API_ENDPOINTS.keys()), 109 | default=list(API_ENDPOINTS.keys()), 110 | help="Select which usage endpoints to query" 111 | ) 112 | 113 | # Submit button 114 | submit_button = st.form_submit_button("Fetch Usage Data", type="primary") 115 | 116 | # Process form submission 117 | if submit_button: 118 | if not api_key: 119 | st.error("Please provide an OpenAI Admin API key.") 120 | return 121 | 122 | if not selected_endpoints: 123 | st.error("Please select at least one usage endpoint.") 124 | return 125 | 126 | try: 127 | # Convert dates to Unix timestamps 128 | start_timestamp = date_to_unix_timestamp(start_date) 129 | end_timestamp = date_to_unix_timestamp(end_date) 130 | 131 | # Create or reuse API client from session state 132 | if 'api_client' not in st.session_state or st.session_state.get('api_key') != api_key: 133 | # Close existing client if it exists 134 | if 'api_client' in st.session_state: 135 | await st.session_state.api_client.close() 136 | # Create new client 137 | st.session_state.api_client = OpenAIUsageAPIClient(api_key) 138 | st.session_state.api_key = api_key 139 | 140 | client = st.session_state.api_client 141 | 142 | # Prepare parameters 143 | base_params = { 144 | "start_time": start_timestamp, 145 | "end_time": end_timestamp, 146 | "group_by": ["project_id", "user_id", "api_key_id", "model"] 147 | } 148 | 149 | # Create tasks for asyncio.gather 150 | tasks = [] 151 | for endpoint_name in selected_endpoints: 152 | endpoint_url = API_ENDPOINTS[endpoint_name] 153 | params = base_params.copy() 154 | 155 | # Special provision for vector stores - can only be grouped by project_id 156 | if endpoint_name == "vector_stores": 157 | params["group_by"] = ["project_id"] 158 | # Code interpreter sessions also can only be grouped by project_id 159 | elif endpoint_name == "code_interpreter_sessions": 160 | params["group_by"] = ["project_id"] 161 | 162 | # Convert params dict to JSON string for caching 163 | params_json = json.dumps(params, sort_keys=True) 164 | task = client.fetch_all_pages(endpoint_name, endpoint_url, params_json) 165 | tasks.append(task) 166 | 167 | # Execute tasks with progress tracking 168 | st.info(f"Fetching data from {len(tasks)} endpoints...") 169 | 170 | all_data = await stqdm.gather(*tasks) 171 | 172 | # all_data is a list of lists, each list contains the data for a single endpoint, need to flatten it 173 | all_data = [item for sublist in all_data for item in sublist] 174 | 175 | # Fetch lookup data for enrichment 176 | st.info("Fetching lookup data for users, projects, and API keys...") 177 | lookup_tasks = [ 178 | client.fetch_lookup_data("users", LOOKUP_ENDPOINTS["users"]), 179 | client.fetch_lookup_data("projects", LOOKUP_ENDPOINTS["projects"]), 180 | client.fetch_lookup_data("api_keys", LOOKUP_ENDPOINTS["api_keys"]) 181 | ] 182 | user_lookup, project_lookup, api_key_lookup = await stqdm.gather(*lookup_tasks) 183 | 184 | # Display cache statistics only in debug mode 185 | if os.getenv("LOGURU_LEVEL") == "DEBUG": 186 | cache_info = client.fetch_all_pages.cache_info() 187 | st.info(f"📊 Cache Statistics: {cache_info.hits} hits, {cache_info.misses} misses, {cache_info.currsize}/{cache_info.maxsize} cached entries") 188 | 189 | # Process and display results 190 | if all_data: 191 | df = create_dataframe(all_data) 192 | # Enrich dataframe with human-readable names 193 | df = enrich_dataframe_with_lookups(df, user_lookup, project_lookup, api_key_lookup) 194 | # Reorder columns for better presentation 195 | df = reorder_columns(df) 196 | 197 | st.success(f"✅ Successfully fetched {len(df)} records from {len(selected_endpoints)} endpoints") 198 | 199 | # Display DataFrame 200 | st.subheader("Usage Data") 201 | st.dataframe(df, use_container_width=True) 202 | 203 | # Download functionality 204 | def convert_df_to_csv(dataframe): 205 | return dataframe.to_csv(index=False) 206 | 207 | csv_data = convert_df_to_csv(df) 208 | 209 | st.download_button( 210 | label="📥 Download as CSV", 211 | data=csv_data, 212 | file_name=f"openai_usage_{start_date}_{end_date}.csv", 213 | mime="text/csv" 214 | ) 215 | 216 | else: 217 | st.warning("No data found for the selected parameters and date range.") 218 | 219 | except Exception as e: 220 | st.error("An error occurred while fetching usage data:") 221 | st.error(f"Error: {str(e)}") 222 | st.error("Full traceback:") 223 | st.code(traceback.format_exc()) 224 | 225 | 226 | if __name__ == "__main__": 227 | asyncio.run(main_async()) --------------------------------------------------------------------------------