├── .streamlit
    └── config.toml
├── .gitignore
├── requirements.txt
├── data_processor.py
├── api_client.py
├── README.md
└── app.py


/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | base="light"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | devenv.sh
3 | .venv
4 | *.csv
5 | .DS_Store


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | streamlit
2 | httpx
3 | pandas
4 | stqdm
5 | async-lru
6 | numpy<2.0
7 | loguru


--------------------------------------------------------------------------------
/data_processor.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from datetime import datetime, timezone
 3 | from typing import List, Dict, Any
 4 | 
 5 | 
 6 | def unix_to_date_string(unix_timestamp: int) -> str:
 7 |     """Convert Unix timestamp to YYYY-MM-DD format"""
 8 |     return datetime.fromtimestamp(unix_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
 9 | 
10 | 
11 | def flatten_usage_data(raw_data: list, endpoint_name: str) -> list:
12 |     """Flatten API response into list of dicts"""
13 |     # This function is now handled in the API client, but keeping for consistency
14 |     return raw_data
15 | 
16 | 
17 | def enrich_dataframe_with_lookups(df: pd.DataFrame, user_lookup: Dict[str, str], 
18 |                                 project_lookup: Dict[str, str], api_key_lookup: Dict[str, str]) -> pd.DataFrame:
19 |     """Add human-readable names to the dataframe based on ID lookups"""
20 |     if df.empty:
21 |         return df
22 |     
23 |     # Add user name column
24 |     if "user_id" in df.columns:
25 |         df["user_email"] = df["user_id"].map(user_lookup).fillna("Unknown")
26 |     
27 |     # Add project name column
28 |     if "project_id" in df.columns:
29 |         df["project_name"] = df["project_id"].map(project_lookup).fillna("Unknown")
30 |     
31 |     # Add API key name column
32 |     if "api_key_id" in df.columns:
33 |         df["api_key_name"] = df["api_key_id"].map(api_key_lookup).fillna("Unknown")
34 |     
35 |     return df
36 | 
37 | 
38 | def reorder_columns(df: pd.DataFrame) -> pd.DataFrame:
39 |     """Reorder columns with specified columns first, followed by remaining columns"""
40 |     if df.empty:
41 |         return df
42 |     
43 |     # Desired column order (leftmost columns)
44 |     preferred_order = [
45 |         "date", "endpoint_type", "input_tokens", "output_tokens", 
46 |         "api_key_id", "api_key_name", "project_id", "project_name", 
47 |         "input_cached_tokens", "input_audio_tokens", "output_audio_tokens", 
48 |         "model", "start_time", "user_email", "num_model_requests"
49 |     ]
50 | 
51 |     # Banned columns
52 |     banned_columns = [
53 |         "object", "user_id", "batch"
54 |     ]
55 |     
56 |     # Get columns that exist in the dataframe from the preferred order
57 |     existing_preferred = [col for col in preferred_order if col in df.columns]
58 |     
59 |     # Get remaining columns not in the preferred order (those which are not banned)
60 |     remaining_columns = [col for col in df.columns if (col not in preferred_order and col not in banned_columns)]
61 |     
62 |     # Combine preferred columns (leftmost) with remaining columns (rightmost)
63 |     final_order = existing_preferred + remaining_columns
64 |     
65 |     return df[final_order]
66 | 
67 | 
68 | def create_dataframe(all_data: List[Dict[str, Any]]) -> pd.DataFrame:
69 |     """Convert list to DataFrame and sort by time"""
70 |     if not all_data:
71 |         return pd.DataFrame()
72 |     
73 |     df = pd.DataFrame(all_data)
74 |     
75 |     # Convert start_time to date string format (in UTC time, ignore local timezone)
76 |     if "start_time" in df.columns:
77 |         df["date"] = df["start_time"].apply(unix_to_date_string)
78 |         df = df.sort_values("start_time")
79 |     
80 |     return df 


--------------------------------------------------------------------------------
/api_client.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | from async_lru import alru_cache
 3 | import asyncio
 4 | import json
 5 | from loguru import logger
 6 | from typing import Dict, List, Any
 7 | 
 8 | 
 9 | class OpenAIUsageAPIClient:
10 |     def __init__(self, api_key: str):
11 |         self.api_key = api_key
12 |         self.client = httpx.AsyncClient(
13 |             headers={
14 |                 "Authorization": f"Bearer {api_key}",
15 |                 "Content-Type": "application/json"
16 |             },
17 |             timeout=30.0
18 |         )
19 |     
20 |     async def _fetch_page(self, endpoint: str, params: dict) -> dict:
21 |         """Single page fetch with error handling"""
22 |         response = await self.client.get(endpoint, params=params)
23 |         response.raise_for_status()
24 |         return response.json()
25 |     
26 |     @alru_cache(maxsize=128)
27 |     async def fetch_all_pages(self, endpoint_name: str, endpoint_url: str, params_json: str) -> List[Dict[str, Any]]:
28 |         """Paginated fetch helper that loops through all pages"""
29 |         logger.debug(f"Fetching all pages for {endpoint_name} with params: {params_json}")
30 |         # Convert JSON string back to dict for processing
31 |         params = json.loads(params_json)
32 |         
33 |         all_results = []
34 |         current_params = params.copy()
35 |         
36 |         while True:
37 |             response_data = await self._fetch_page(endpoint_url, current_params)
38 |             
39 |             # Process each bucket in the response
40 |             for bucket in response_data.get("data", []):
41 |                 start_time = bucket.get("start_time")
42 |                 results = bucket.get("results", [])
43 |                 
44 |                 # Each result object becomes a row with inherited start_time
45 |                 for result in results:
46 |                     row = result.copy()
47 |                     row["start_time"] = start_time
48 |                     row["endpoint_type"] = endpoint_name
49 |                     all_results.append(row)
50 |             
51 |             # Check for pagination
52 |             if not response_data.get("has_more", False):
53 |                 break
54 |                 
55 |             next_page = response_data.get("next_page")
56 |             if not next_page:
57 |                 break
58 |                 
59 |             current_params["page"] = next_page
60 |         
61 |         return all_results
62 |     
63 |     @alru_cache(maxsize=32)
64 |     async def fetch_lookup_data(self, lookup_type: str, endpoint_url: str) -> Dict[str, str]:
65 |         """Fetch lookup data for users, projects, or API keys"""
66 |         all_items = []
67 |         params = {"limit": 100}
68 |         
69 |         while True:
70 |             response_data = await self._fetch_page(endpoint_url, params)
71 |             all_items.extend(response_data.get("data", []))
72 |             
73 |             if not response_data.get("has_more", False):
74 |                 break
75 |                 
76 |             # Use the last_id for pagination (different from usage endpoints)
77 |             last_id = response_data.get("last_id")
78 |             if not last_id:
79 |                 break
80 |             params["after"] = last_id
81 |         
82 |         # Create ID to name mapping based on lookup type
83 |         lookup_map = {}
84 |         for item in all_items:
85 |             if lookup_type == "users":
86 |                 lookup_map[item.get("id")] = item.get("email", "Unknown")
87 |             elif lookup_type == "projects":
88 |                 lookup_map[item.get("id")] = item.get("name", "Unknown")
89 |             elif lookup_type == "api_keys":
90 |                 lookup_map[item.get("id")] = item.get("name", "Unknown")
91 |         
92 |         return lookup_map
93 |     
94 |     async def close(self):
95 |         """Cleanup method for httpx client"""
96 |         await self.client.aclose() 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # OpenAI Usage API Dashboard
  2 | 
  3 | A powerful Streamlit application for fetching, analyzing, and exporting usage data from OpenAI's comprehensive Usage API across all available endpoints.
  4 | 
  5 | ## 🚀 What This App Does
  6 | 
  7 | This dashboard provides a unified interface to query OpenAI's Usage API endpoints and generate comprehensive usage reports. It fetches data from multiple usage endpoints simultaneously, enriches the data with human-readable names, and exports everything to CSV for further analysis.
  8 | 
  9 | ### Supported Usage Endpoints
 10 | 
 11 | - **Completions** - Text generation usage (input/output tokens, cached tokens, audio tokens)
 12 | - **Embeddings** - Text embedding usage 
 13 | - **Moderations** - Content moderation usage
 14 | - **Images** - Image generation/editing usage
 15 | - **Audio Speeches** - Text-to-speech usage
 16 | - **Audio Transcriptions** - Speech-to-text usage  
 17 | - **Vector Stores** - Vector storage usage
 18 | - **Code Interpreter Sessions** - Code execution session usage
 19 | 
 20 | ## ✨ Key Features
 21 | 
 22 | - **Async Processing**: Fetches data from multiple endpoints simultaneously with real-time progress tracking
 23 | - **Smart Caching**: Uses LRU cache to avoid redundant API calls and improve performance
 24 | - **Data Enrichment**: Automatically looks up and adds human-readable names for users, projects, and API keys
 25 | - **Flexible Date Ranges**: Select any date range for usage analysis
 26 | - **Endpoint Selection**: Choose which usage endpoints to query
 27 | - **Column Reordering**: Presents data in a logical, analysis-friendly column order
 28 | - **CSV Export**: Download complete usage data for further analysis
 29 | - **Session Persistence**: Maintains API client and cache across app refreshes
 30 | 
 31 | ## 🛠️ Installation & Setup
 32 | 
 33 | ### Prerequisites
 34 | 
 35 | - Python 3.8+
 36 | - OpenAI Admin API Key
 37 | 
 38 | ### Install Dependencies
 39 | 
 40 | ```bash
 41 | pip install -r requirements.txt
 42 | ```
 43 | 
 44 | ### Environment Setup (Optional)
 45 | 
 46 | Set your OpenAI Admin API key as an environment variable:
 47 | 
 48 | ```bash
 49 | export OPENAI_ADMIN_KEY="sk-admin-your-key-here"
 50 | ```
 51 | 
 52 | Or create a `devenv.sh` file:
 53 | ```bash
 54 | OPENAI_ADMIN_KEY="sk-admin-your-key-here"
 55 | source devenv.sh
 56 | ```
 57 | 
 58 | ## 🎯 How to Use
 59 | 
 60 | ### 1. Start the Application
 61 | 
 62 | ```bash
 63 | streamlit run app.py
 64 | ```
 65 | 
 66 | ### 2. Configure Your API Key
 67 | 
 68 | - If you set the `OPENAI_ADMIN_KEY` environment variable, it will be pre-populated
 69 | - Otherwise, enter your OpenAI Admin API key in the sidebar
 70 | 
 71 | ### 3. Select Your Query Parameters
 72 | 
 73 | - **Date Range**: Choose start and end dates (treated as UTC)
 74 | - **Usage Endpoints**: Select which usage types to fetch (all selected by default)
 75 | 
 76 | ### 4. Fetch Data
 77 | 
 78 | Click "Fetch Usage Data" to:
 79 | - Query selected endpoints asynchronously
 80 | - Fetch lookup data for users, projects, and API keys
 81 | - Process and enrich the data
 82 | - Display results with progress tracking
 83 | 
 84 | ### 5. Analyze and Export
 85 | 
 86 | - Review the data in the interactive table
 87 | - Download as CSV for further analysis in Excel, Google Sheets, or other tools
 88 | 
 89 | ## 📊 Data Output
 90 | 
 91 | The final dataset includes:
 92 | 
 93 | ### Core Usage Data
 94 | - Date, endpoint type, token counts, model requests
 95 | - Input/output tokens, cached tokens, audio tokens
 96 | - Model names, batch status
 97 | 
 98 | ### Enriched Information  
 99 | - **User Details**: User ID → Email address
100 | - **Project Details**: Project ID → Project name
101 | - **API Key Details**: API Key ID → API key name
102 | 
103 | ### Column Order
104 | Data is presented with the most important columns first: date, endpoint_type, input_tokens, output_tokens, api_key_id, api_key_name, project_id, project_name, and other relevant metrics.
105 | 
106 | ## 🔧 Advanced Features
107 | 
108 | ### Cache Management
109 | - Use the "Clear All Cache" button in the sidebar to reset cached data
110 | - Cache persists across app refreshes for improved performance
111 | - View cache statistics when `LOGURU_LEVEL=DEBUG` is set
112 | 
113 | ### Special Endpoint Handling
114 | - Vector stores and code interpreter sessions are automatically configured with appropriate grouping parameters
115 | - All other endpoints group by user_id, project_id, api_key_id, and model for maximum granularity
116 | 
117 | ## 🚨 Requirements
118 | 
119 | - Valid OpenAI Admin API key with usage access
120 | - Network access to OpenAI's API endpoints
121 | - Python packages listed in `requirements.txt`
122 | 
123 | ## 📝 Notes
124 | 
125 | - Dates are treated as UTC when converting to timestamps
126 | - The app handles pagination automatically across all endpoints
127 | - Empty results are handled gracefully with appropriate user feedback
128 | - All errors are captured and displayed with detailed tracebacks
129 | 
130 | ## 🤝 Usage Tips
131 | 
132 | - Run similar queries multiple times to benefit from caching
133 | - Use the debug mode to monitor cache performance
134 | - Select specific endpoints if you only need certain usage types
135 | - Export data regularly for historical analysis and reporting
136 | 
137 | ---
138 | 
139 | Built with ❤️ using Streamlit, httpx, and async processing for optimal performance.


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import asyncio
  3 | import os
  4 | import json
  5 | from datetime import datetime, date, timedelta, timezone
  6 | import pandas as pd
  7 | import traceback
  8 | from io import BytesIO
  9 | 
 10 | from stqdm import stqdm
 11 | from api_client import OpenAIUsageAPIClient
 12 | from data_processor import create_dataframe, enrich_dataframe_with_lookups, reorder_columns
 13 | 
 14 | # API endpoints configuration
 15 | API_ENDPOINTS = {
 16 |     "completions": "https://api.openai.com/v1/organization/usage/completions",
 17 |     "embeddings": "https://api.openai.com/v1/organization/usage/embeddings", 
 18 |     "moderations": "https://api.openai.com/v1/organization/usage/moderations",
 19 |     "images": "https://api.openai.com/v1/organization/usage/images",
 20 |     "audio_speeches": "https://api.openai.com/v1/organization/usage/audio_speeches",
 21 |     "audio_transcriptions": "https://api.openai.com/v1/organization/usage/audio_transcriptions",
 22 |     "vector_stores": "https://api.openai.com/v1/organization/usage/vector_stores",
 23 |     "code_interpreter_sessions": "https://api.openai.com/v1/organization/usage/code_interpreter_sessions"
 24 | }
 25 | 
 26 | # Lookup endpoints for ID-to-name mapping
 27 | LOOKUP_ENDPOINTS = {
 28 |     "users": "https://api.openai.com/v1/organization/users",
 29 |     "projects": "https://api.openai.com/v1/organization/projects",
 30 |     "api_keys": "https://api.openai.com/v1/organization/admin_api_keys"
 31 | }
 32 | 
 33 | 
 34 | def date_to_unix_timestamp(date_obj: date) -> int:
 35 |     """Convert date to Unix timestamp, treating the date as UTC"""
 36 |     # Create a datetime object and explicitly set timezone to UTC
 37 |     dt_utc = datetime.combine(date_obj, datetime.min.time(), timezone.utc)
 38 |     return int(dt_utc.timestamp())
 39 | 
 40 | 
 41 | async def main_async():
 42 |     # Page configuration
 43 |     st.set_page_config(
 44 |         page_title="OpenAI Usage API Dashboard",
 45 |         page_icon="📊",
 46 |         layout="wide",
 47 |         initial_sidebar_state="expanded"
 48 |     )
 49 |     
 50 |     # Sidebar with API key
 51 |     st.sidebar.header("Configuration")
 52 |     
 53 |     # Check environment for API key and pre-populate
 54 |     env_api_key = os.getenv("OPENAI_ADMIN_KEY", "")
 55 |     api_key = st.sidebar.text_input(
 56 |         "OpenAI Admin API Key",
 57 |         value=env_api_key,
 58 |         type="password",
 59 |         help="Enter your [OpenAI Admin API key](https://platform.openai.com/settings/organization/admin-keys). If OPENAI_ADMIN_KEY environment variable is set, it will be pre-populated."
 60 |     )
 61 |     
 62 |     # Author's security note
 63 |     st.sidebar.markdown("""
 64 |     _**Author's Note:** While I can only claim that your credentials are not stored anywhere, for maximum security, you should generate a new app-specific and read only [Admin API key](https://platform.openai.com/settings/organization/admin-keys) on your account and use it here. This way, you can deactivate the key after you don't plan to use the app anymore, and it won't affect any of your other keys/apps. You can check out the GitHub source for this app using below button:_
 65 |     """)
 66 |     
 67 |     # GitHub repository badge
 68 |     st.sidebar.markdown("""
 69 |     [![GitHub](https://img.shields.io/github/stars/tipani86/OpenAI-Tools)](https://github.com/tipani86/OpenAI-Tools)
 70 |     """)
 71 |     
 72 |     # Cache management
 73 |     st.sidebar.header("Cache Management")
 74 |     if st.sidebar.button("🗑️ Clear All Cache", help="Clear cached API responses and session data"):
 75 |         # Clear the alru cache if client exists
 76 |         if 'api_client' in st.session_state:
 77 |             st.session_state.api_client.fetch_all_pages.cache_clear()
 78 |         # Clear session state
 79 |         st.session_state.clear()
 80 |         st.sidebar.success("Cache and session data cleared successfully!")
 81 | 
 82 |     # Main content
 83 |     st.title("📊 OpenAI Usage API Dashboard")
 84 |     st.markdown("Fetch and analyze usage data from OpenAI's Usage API across all endpoints.")
 85 |     
 86 |     # Form for user inputs
 87 |     with st.form("usage_form"):
 88 |         st.subheader("Settings")
 89 |         
 90 |         # Date range picker
 91 |         col1, col2 = st.columns(2)
 92 |         with col1:
 93 |             start_date = st.date_input(
 94 |                 "Start Date (inclusive)",
 95 |                 value=date.today() - timedelta(days=7),
 96 |                 help="Start date for usage data (inclusive)"
 97 |             )
 98 |         with col2:
 99 |             end_date = st.date_input(
100 |                 "End Date (**exclusive**; for monthly reports, select the first day of the _second_ month)", 
101 |                 value=date.today(),
102 |                 help="End date for usage data (exclusive)"
103 |             )
104 |         
105 |         # Usage endpoints selection
106 |         selected_endpoints = st.multiselect(
107 |             "Usage Endpoints",
108 |             options=list(API_ENDPOINTS.keys()),
109 |             default=list(API_ENDPOINTS.keys()),
110 |             help="Select which usage endpoints to query"
111 |         )
112 |         
113 |         # Submit button
114 |         submit_button = st.form_submit_button("Fetch Usage Data", type="primary")
115 |     
116 |     # Process form submission
117 |     if submit_button:
118 |         if not api_key:
119 |             st.error("Please provide an OpenAI Admin API key.")
120 |             return
121 |             
122 |         if not selected_endpoints:
123 |             st.error("Please select at least one usage endpoint.")
124 |             return
125 |             
126 |         try:
127 |             # Convert dates to Unix timestamps
128 |             start_timestamp = date_to_unix_timestamp(start_date)
129 |             end_timestamp = date_to_unix_timestamp(end_date)
130 |             
131 |             # Create or reuse API client from session state
132 |             if 'api_client' not in st.session_state or st.session_state.get('api_key') != api_key:
133 |                 # Close existing client if it exists
134 |                 if 'api_client' in st.session_state:
135 |                     await st.session_state.api_client.close()
136 |                 # Create new client
137 |                 st.session_state.api_client = OpenAIUsageAPIClient(api_key)
138 |                 st.session_state.api_key = api_key
139 |             
140 |             client = st.session_state.api_client
141 |             
142 |             # Prepare parameters
143 |             base_params = {
144 |                 "start_time": start_timestamp,
145 |                 "end_time": end_timestamp,
146 |                 "group_by": ["project_id", "user_id", "api_key_id", "model"]
147 |             }
148 |             
149 |             # Create tasks for asyncio.gather
150 |             tasks = []
151 |             for endpoint_name in selected_endpoints:
152 |                 endpoint_url = API_ENDPOINTS[endpoint_name]
153 |                 params = base_params.copy()
154 |                 
155 |                 # Special provision for vector stores - can only be grouped by project_id
156 |                 if endpoint_name == "vector_stores":
157 |                     params["group_by"] = ["project_id"]
158 |                 # Code interpreter sessions also can only be grouped by project_id
159 |                 elif endpoint_name == "code_interpreter_sessions":
160 |                     params["group_by"] = ["project_id"]
161 |                 
162 |                 # Convert params dict to JSON string for caching
163 |                 params_json = json.dumps(params, sort_keys=True)
164 |                 task = client.fetch_all_pages(endpoint_name, endpoint_url, params_json)
165 |                 tasks.append(task)
166 |             
167 |             # Execute tasks with progress tracking
168 |             st.info(f"Fetching data from {len(tasks)} endpoints...")
169 |             
170 |             all_data = await stqdm.gather(*tasks)
171 | 
172 |             # all_data is a list of lists, each list contains the data for a single endpoint, need to flatten it
173 |             all_data = [item for sublist in all_data for item in sublist]
174 |             
175 |             # Fetch lookup data for enrichment
176 |             st.info("Fetching lookup data for users, projects, and API keys...")
177 |             lookup_tasks = [
178 |                 client.fetch_lookup_data("users", LOOKUP_ENDPOINTS["users"]),
179 |                 client.fetch_lookup_data("projects", LOOKUP_ENDPOINTS["projects"]),
180 |                 client.fetch_lookup_data("api_keys", LOOKUP_ENDPOINTS["api_keys"])
181 |             ]
182 |             user_lookup, project_lookup, api_key_lookup = await stqdm.gather(*lookup_tasks)
183 |             
184 |             # Display cache statistics only in debug mode
185 |             if os.getenv("LOGURU_LEVEL") == "DEBUG":
186 |                 cache_info = client.fetch_all_pages.cache_info()
187 |                 st.info(f"📊 Cache Statistics: {cache_info.hits} hits, {cache_info.misses} misses, {cache_info.currsize}/{cache_info.maxsize} cached entries")
188 |             
189 |             # Process and display results
190 |             if all_data:
191 |                 df = create_dataframe(all_data)
192 |                 # Enrich dataframe with human-readable names
193 |                 df = enrich_dataframe_with_lookups(df, user_lookup, project_lookup, api_key_lookup)
194 |                 # Reorder columns for better presentation
195 |                 df = reorder_columns(df)
196 |                 
197 |                 st.success(f"✅ Successfully fetched {len(df)} records from {len(selected_endpoints)} endpoints")
198 |                 
199 |                 # Display DataFrame
200 |                 st.subheader("Usage Data")
201 |                 st.dataframe(df, use_container_width=True)
202 |                 
203 |                 # Download functionality
204 |                 def convert_df_to_csv(dataframe):
205 |                     return dataframe.to_csv(index=False)
206 |                 
207 |                 csv_data = convert_df_to_csv(df)
208 |                 
209 |                 st.download_button(
210 |                     label="📥 Download as CSV",
211 |                     data=csv_data,
212 |                     file_name=f"openai_usage_{start_date}_{end_date}.csv",
213 |                     mime="text/csv"
214 |                 )
215 |                 
216 |             else:
217 |                 st.warning("No data found for the selected parameters and date range.")
218 |                 
219 |         except Exception as e:
220 |             st.error("An error occurred while fetching usage data:")
221 |             st.error(f"Error: {str(e)}")
222 |             st.error("Full traceback:")
223 |             st.code(traceback.format_exc())
224 | 
225 | 
226 | if __name__ == "__main__":
227 |     asyncio.run(main_async()) 


--------------------------------------------------------------------------------