├── src └── gpu_benchmark │ ├── __init__.py │ ├── benchmarks │ ├── __init__.py │ ├── stable_diffusion_1_5.py │ └── qwen3_0_6b.py │ ├── main.py │ └── database.py ├── .gitignore ├── MANIFEST.in ├── pyproject.toml ├── LICENSE └── README.md /src/gpu_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # src/gpu_benchmark/__init__.py 2 | from .database import upload_benchmark_results 3 | from .main import main -------------------------------------------------------------------------------- /src/gpu_benchmark/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | # src/gpu_benchmark/benchmarks/__init__.py 2 | from . import stable_diffusion_1_5 3 | from . import qwen3_0_6b -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | env/ 3 | __pycache__/ 4 | *.pyc 5 | *.pyo 6 | *.pyd 7 | *.pyw 8 | *.egg-info 9 | dist/ 10 | build/ 11 | *.log 12 | *.log.* 13 | *.log.*.* 14 | *.log.*.*.* 15 | *.log.*.*.*.* -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Exclude specific files and patterns 2 | exclude .env 3 | exclude *.ipynb 4 | exclude .gitignore 5 | exclude *.log 6 | exclude *.txt 7 | 8 | # Exclude entire directories 9 | prune env 10 | prune .git 11 | prune __pycache__ 12 | prune benchmark_results 13 | prune .ipynb_checkpoints 14 | 15 | # Global exclusions (apply to all directories) 16 | global-exclude *.pyc 17 | global-exclude *.pyo 18 | global-exclude .DS_Store -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "gpu_benchmark" 3 | version = "0.3.1" 4 | description = "GPU benchmarking tool using Stable Diffusion" 5 | readme = "README.md" 6 | authors = [ 7 | {name = "Max Hager", email = "maxhager28@gmail.com"} 8 | ] 9 | dependencies = [ 10 | "torch", 11 | "tqdm", 12 | "diffusers", 13 | "transformers", 14 | "accelerate", 15 | "pynvml", 16 | "supabase" 17 | ] 18 | 19 | [project.scripts] 20 | gpu-benchmark = "gpu_benchmark.main:main" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Max Hager 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPU Benchmark by [United Compute](https://www.unitedcompute.ai) 2 | 3 | A simple CLI tool to benchmark your GPU's performance with Stable Diffusion and compare results in our global benchmark results. 4 | 5 | ![United Compute Logo](https://www.unitedcompute.ai/logo.png) 6 | 7 | ## Installation 8 | 9 | ```bash 10 | pip install gpu-benchmark 11 | ``` 12 | 13 | ## Usage 14 | 15 | Run the benchmark (takes 5 minutes after the pipeline is loaded): 16 | 17 | ```bash 18 | gpu-benchmark 19 | ``` 20 | 21 | ### Optional Arguments 22 | 23 | If you're running on a cloud provider, specify it with the `--provider` flag: 24 | 25 | ```bash 26 | gpu-benchmark --provider runpod 27 | ``` 28 | 29 | You can specify the model to use for the benchmark with the `--model` flag. By default, the Stable Diffusion 1.5 model is used. 30 | Example for running a different model: 31 | 32 | ```bash 33 | gpu-benchmark --model qwen3-0-6b 34 | ``` 35 | 36 | For multi-GPU systems, you can select a specific GPU like this: 37 | 38 | 1. Using the `--gpu` flag: 39 | 40 | ```bash 41 | gpu-benchmark --gpu 1 # Uses GPU index 1 42 | ``` 43 | 44 | The tool will: 45 | 46 | 1. Load a Stable Diffusion pipeline 47 | 2. Generate images for 5 minutes 48 | 3. Count image generations and track GPU temperature 49 | 4. Upload results to the [United Compute Benchmark Results](https://www.unitedcompute.ai/gpu-benchmark) 50 | 51 | ## What it measures 52 | 53 | - **Benchmark Score**: Number of iterations or images generated in 5 minutes (model-dependent) 54 | - **GPU Model**: The specific model of your GPU (e.g., NVIDIA GeForce RTX 4090) 55 | - **Max Heat**: Maximum GPU temperature reached (°C) 56 | - **Avg Heat**: Average GPU temperature during the benchmark (°C) 57 | - **Country**: Your location (detected automatically) 58 | - **GPU Power**: Power consumption in watts (W) 59 | - **GPU Memory**: Total GPU memory in gigabytes (GB) 60 | - **Platform**: Operating system information 61 | - **Acceleration**: CUDA version 62 | - **PyTorch Version**: PyTorch library version 63 | 64 | ## Requirements 65 | 66 | - CUDA-compatible NVIDIA GPU 67 | - Python 3.8+ 68 | 69 | ## Links 70 | 71 | - [Official Website](https://www.unitedcompute.ai) 72 | - [GPU Benchmark Results](https://www.unitedcompute.ai/gpu-benchmark) 73 | -------------------------------------------------------------------------------- /src/gpu_benchmark/main.py: -------------------------------------------------------------------------------- 1 | # src/gpu_benchmark/main.py 2 | from .benchmarks import stable_diffusion_1_5, qwen3_0_6b 3 | from .database import upload_benchmark_results 4 | import argparse 5 | import torch 6 | 7 | # Import benchmark runners dynamically or add specific imports here later 8 | # For now, let's assume functions like run_stable_diffusion_benchmark, run_llm_benchmark 9 | # will be available from src.gpu_benchmark.benchmarks 10 | # from .benchmarks import stable_diffusion # This will be created 11 | # from .utils import get_clean_platform # This will be created, assuming get_clean_platform moves to utils 12 | 13 | def main(): 14 | """Entry point for the GPU benchmark command-line tool.""" 15 | # Parse command-line arguments 16 | parser = argparse.ArgumentParser(description="GPU Benchmark by United Compute") 17 | parser.add_argument("--provider", type=str, help="Cloud provider (e.g., RunPod, AWS, GCP) or Private", default="Private") 18 | parser.add_argument("--gpu", type=int, help="GPU device index to use (defaults to CUDA_VISIBLE_DEVICES or 0)", default=None) 19 | parser.add_argument( 20 | "--model", 21 | type=str, 22 | help="Model to benchmark (e.g., stable-diffusion-1-5, qwen3-0-6b)", 23 | default="stable-diffusion-1-5", 24 | choices=["stable-diffusion-1-5", "qwen3-0-6b"] 25 | ) 26 | args = parser.parse_args() 27 | 28 | # If GPU device is specified, set it 29 | if args.gpu is not None: 30 | torch.cuda.set_device(args.gpu) 31 | 32 | # Convert provider to lowercase 33 | provider = args.provider.lower() 34 | 35 | # Simple start message 36 | print(f"GPU Benchmark starting for model: {args.model}...") 37 | print("This benchmark will run for 5 minutes") 38 | 39 | # Fixed duration 40 | duration = 300 # 300 seconds 41 | 42 | results = None 43 | if args.model == "stable-diffusion-1-5": 44 | print("Loading Stable Diffusion 1.5 pipeline...") 45 | pipe = stable_diffusion_1_5.load_pipeline() 46 | print("Pipeline loaded successfully!") 47 | 48 | print("Running Stable Diffusion 1.5 benchmark...") 49 | results = stable_diffusion_1_5.run_benchmark(pipe=pipe, duration=duration) 50 | elif args.model == "qwen3-0-6b": 51 | print("Loading Qwen3-0-6B model...") 52 | model, tokenizer = qwen3_0_6b.load_pipeline() 53 | 54 | print("Running Qwen3-0-6B benchmark...") 55 | results = qwen3_0_6b.run_benchmark(model=model, tokenizer=tokenizer, duration=duration) 56 | else: 57 | print(f"Error: Model {args.model} not supported.") 58 | return 59 | 60 | # Only proceed if the benchmark completed successfully (not canceled) 61 | if results and results.get("completed", False): 62 | primary_metric_val = None 63 | max_temp_val = None 64 | avg_temp_val = None 65 | gpu_memory_val = None 66 | 67 | # Get the primary metric using the generic 'result' key 68 | primary_metric_val = results.get('result') 69 | 70 | if args.model == "stable-diffusion-1-5": 71 | max_temp_val = results.get('max_temp') 72 | avg_temp_val = results.get('avg_temp') 73 | gpu_memory_val = results.get('gpu_memory_total') 74 | elif args.model == "qwen3-0-6b": 75 | max_temp_val = results.get('max_temp') 76 | avg_temp_val = results.get('avg_temp') 77 | gpu_memory_val = results.get('gpu_memory_total') 78 | 79 | # The upload_benchmark_results function will print the success message and ID. 80 | upload_benchmark_results( 81 | model_name=args.model, 82 | primary_metric_value=primary_metric_val, # This is now consistently from results.get('result') 83 | max_temp=max_temp_val, 84 | avg_temp=avg_temp_val, 85 | cloud_provider=provider, 86 | gpu_power_watts=results.get('gpu_power_watts'), 87 | gpu_memory_total=gpu_memory_val, 88 | platform=results.get('platform'), 89 | acceleration=results.get('acceleration'), 90 | torch_version=results.get('torch_version') 91 | ) 92 | 93 | print("Benchmark completed") # Final confirmation message 94 | elif results and results.get("error"): 95 | print(f"\nBenchmark failed: {results.get('error')}") 96 | elif results is None and args.model != "stable-diffusion-1-5" and args.model != "qwen3-0-6b": # Model not supported 97 | pass # Error already printed 98 | else: 99 | print("\nBenchmark was canceled or did not complete. Results not submitted.") 100 | if results and results.get("reason") == "canceled": 101 | # When printing items processed before cancellation, also use 'result' 102 | items_before_cancel = results.get('result', 0) 103 | if args.model == "qwen3-0-6b": 104 | print(f"Generations processed before cancellation: {items_before_cancel}") 105 | elif args.model == "stable-diffusion-1-5": 106 | print(f"Images generated before cancellation: {items_before_cancel}") 107 | 108 | if __name__ == "__main__": 109 | main() -------------------------------------------------------------------------------- /src/gpu_benchmark/database.py: -------------------------------------------------------------------------------- 1 | # src/gpu_benchmark/database.py 2 | import requests 3 | import datetime 4 | import torch 5 | 6 | # Hardcoded Supabase credentials (anon key is designed to be public) 7 | SUPABASE_URL = "https://jftqjabhnesfphpkoilc.supabase.co" 8 | SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImpmdHFqYWJobmVzZnBocGtvaWxjIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDQ5NzI4NzIsImV4cCI6MjA2MDU0ODg3Mn0.S0ZdRIauUyMhdVJtYFNquvnlW3dV1wxERy7YrurZyag" 9 | 10 | def country_code_to_flag(country_code): 11 | """Convert country code to flag emoji.""" 12 | if len(country_code) != 2 or not country_code.isalpha(): 13 | return "🏳️" # White flag for unknown 14 | 15 | # Convert each letter to regional indicator symbol 16 | # A-Z: 0x41-0x5A -> regional indicators: 0x1F1E6-0x1F1FF 17 | return ''.join(chr(ord(c.upper()) - ord('A') + ord('🇦')) for c in country_code) 18 | 19 | def get_country_flag(): 20 | """Get country flag emoji based on IP.""" 21 | try: 22 | country_response = requests.get("https://ipinfo.io/json") 23 | country_code = country_response.json().get("country", "Unknown") 24 | return country_code_to_flag(country_code) 25 | except Exception as e: 26 | print(f"Error getting country info: {e}") 27 | return "🏳️" # White flag for unknown 28 | 29 | def upload_benchmark_results(model_name: str, primary_metric_value: int, max_temp: float, avg_temp: float, cloud_provider: str = "Private", **kwargs): 30 | """Upload benchmark results to Supabase database. 31 | 32 | Args: 33 | model_name: Name of the model ("stable-diffusion-1-5", "qwen3-0-6b") to determine the target table. 34 | primary_metric_value: Value for the primary metric (e.g., images generated or generations processed), 35 | which will be stored in the 'result' column. 36 | max_temp: Maximum GPU temperature recorded. 37 | avg_temp: Average GPU temperature recorded. 38 | cloud_provider: Cloud provider name (default: "Private"). 39 | **kwargs: Additional fields to upload (e.g., gpu_power_watts, gpu_memory_total). 40 | 41 | Returns: 42 | tuple: (success, message, record_id) 43 | """ 44 | 45 | table_name = "" 46 | metric_column_name = "result" # Generic column name for the primary metric 47 | 48 | if model_name == "stable-diffusion-1-5": 49 | table_name = "stable-diffusion-1-5" 50 | elif model_name == "qwen3-0-6b": 51 | table_name = "qwen3-0-6b" 52 | else: 53 | err_msg = f"Unsupported model_name '{model_name}' for database upload." 54 | print(f"❌ {err_msg}") 55 | return False, err_msg, None 56 | 57 | # Get country flag 58 | flag_emoji = get_country_flag() 59 | 60 | # Prepare benchmark results 61 | benchmark_data = { 62 | "created_at": datetime.datetime.now().isoformat(), 63 | "gpu_type": torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "N/A", 64 | metric_column_name: primary_metric_value, # Using "result" as the column name 65 | "max_heat": int(max_temp) if max_temp is not None else None, 66 | "avg_heat": int(avg_temp) if avg_temp is not None else None, 67 | "country": flag_emoji, 68 | "provider": cloud_provider 69 | } 70 | 71 | # Add additional fields if provided. 72 | additional_fields_expected = [ 73 | "gpu_power_watts", "gpu_memory_total", "platform", 74 | "acceleration", "torch_version" 75 | ] 76 | 77 | for field in additional_fields_expected: 78 | if field in kwargs and kwargs[field] is not None: 79 | benchmark_data[field] = kwargs[field] 80 | 81 | api_url = f"{SUPABASE_URL}/rest/v1/{table_name}" # Dynamic table name 82 | 83 | try: 84 | response = requests.post( 85 | api_url, 86 | json=benchmark_data, 87 | headers={ 88 | "Content-Type": "application/json", 89 | "apikey": SUPABASE_ANON_KEY, 90 | "Authorization": f"Bearer {SUPABASE_ANON_KEY}", 91 | "Prefer": "return=representation" 92 | } 93 | ) 94 | 95 | if response.status_code in (200, 201): 96 | try: 97 | record_data = response.json() 98 | if isinstance(record_data, list) and len(record_data) > 0: 99 | record_id = record_data[0].get('id') 100 | print(f"✅ Results uploaded successfully to benchmark results!") 101 | print(f"Your ID at www.unitedcompute.ai/gpu-benchmark: {record_id}") 102 | return True, "Upload successful", record_id 103 | else: 104 | print(f"✅ Upload successful, but couldn't retrieve ID from response: {record_data}") 105 | return True, "Upload successful, but couldn't retrieve ID", None 106 | except ValueError as e: # Catch JSON decoding errors 107 | print(f"✅ Upload reported success (status {response.status_code}), but failed to parse JSON response: {e}. Response text: '{response.text}'") 108 | return True, f"Upload successful (status {response.status_code}), but error parsing response", None 109 | else: 110 | error_details = f"Status Code: {response.status_code}. Response Body: '{response.text}'. Headers: {response.headers}" 111 | error_message = f"Failed to upload results to Supabase. {error_details}" 112 | print(f"❌ Database Upload Error: {error_message}") 113 | if response.status_code == 400: 114 | print("Hint (400 Bad Request): This might be due to a mismatch between the data sent and the table schema in Supabase (e.g., wrong data types for columns, missing required columns that are not nullable, or malformed JSON). Check the 'Response Body' above for specific column errors from Supabase.") 115 | elif response.status_code == 401: 116 | print("Hint (401 Unauthorized): Check if the Supabase ANON_KEY is correct and has the necessary INSERT permissions for the table. Review Row Level Security (RLS) policies on the table.") 117 | elif response.status_code == 403: 118 | print("Hint (403 Forbidden): The request was understood, but refused. This often relates to permissions, possibly RLS policies or service-level API key permissions for insert operations on the target table.") 119 | elif response.status_code == 404: 120 | print(f"Hint (404 Not Found): Check if the table_name '{table_name}' is correct and the API endpoint '{api_url}' is valid. The table might not exist or the URL path could be wrong.") 121 | return False, error_message, None 122 | 123 | except requests.exceptions.ConnectionError as e: 124 | error_message = f"Network Connection Error: Failed to connect to Supabase at {SUPABASE_URL}. Details: {e}" 125 | print(f"❌ {error_message}") 126 | print("Troubleshooting: Check your internet connection and firewall settings. Ensure Supabase services are operational.") 127 | return False, error_message, None 128 | except requests.exceptions.Timeout as e: 129 | error_message = f"Request Timeout: The request to Supabase timed out. URL: {api_url}. Details: {e}" 130 | print(f"❌ {error_message}") 131 | print("Troubleshooting: Check your network connection. The Supabase server might be overloaded or slow to respond.") 132 | return False, error_message, None 133 | except requests.exceptions.RequestException as e: # Catches other requests-related errors (e.g., invalid URL) 134 | error_message = f"Request Error: An error occurred during the request to Supabase. URL: {api_url}. Details: {type(e).__name__} - {e}" 135 | print(f"❌ {error_message}") 136 | return False, error_message, None 137 | except Exception as e: 138 | import traceback 139 | error_message = f"Unexpected Error: An unexpected Python error occurred during database upload. Details: {type(e).__name__} - {e}" 140 | print(f"❌ {error_message}") 141 | traceback.print_exc() 142 | return False, error_message, None -------------------------------------------------------------------------------- /src/gpu_benchmark/benchmarks/stable_diffusion_1_5.py: -------------------------------------------------------------------------------- 1 | # src/gpu_benchmark/benchmark.py 2 | import torch 3 | import time 4 | from tqdm import tqdm 5 | import pynvml 6 | from diffusers import StableDiffusionPipeline 7 | import platform 8 | import os 9 | 10 | def get_clean_platform(): 11 | os_platform = platform.system() 12 | if os_platform == "Linux": 13 | try: 14 | with open("/etc/os-release") as f: 15 | for line in f: 16 | if line.startswith("PRETTY_NAME="): 17 | return line.strip().split("=")[1].strip('"') 18 | except Exception: 19 | pass 20 | return f"Linux {platform.release()}" 21 | elif os_platform == "Windows": 22 | return f"Windows {platform.release()}" 23 | elif os_platform == "Darwin": 24 | return f"macOS {platform.mac_ver()[0]}" 25 | else: 26 | return os_platform 27 | 28 | def load_pipeline(): 29 | """Load the Stable Diffusion pipeline and return it.""" 30 | model_id = "yachty66/stable-diffusion-v1-5" 31 | pipe = StableDiffusionPipeline.from_pretrained( 32 | model_id, 33 | torch_dtype=torch.float16, 34 | low_cpu_mem_usage=True 35 | ) 36 | pipe = pipe.to("cuda") 37 | return pipe 38 | 39 | def get_nvml_device_handle(): 40 | """Get the correct NVML device handle for the GPU being used.""" 41 | pynvml.nvmlInit() 42 | 43 | # Check CUDA_VISIBLE_DEVICES first 44 | cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') 45 | if cuda_visible_devices is not None: 46 | try: 47 | # When CUDA_VISIBLE_DEVICES is set, the first (and only) visible GPU 48 | # becomes index 0 to CUDA, but we need to use the original index for NVML 49 | original_gpu_index = int(cuda_visible_devices.split(',')[0]) 50 | handle = pynvml.nvmlDeviceGetHandleByIndex(original_gpu_index) 51 | return handle 52 | except (ValueError, IndexError): 53 | print(f"Warning: Could not parse CUDA_VISIBLE_DEVICES={cuda_visible_devices}") 54 | 55 | # Fallback to current CUDA device 56 | cuda_idx = torch.cuda.current_device() 57 | return pynvml.nvmlDeviceGetHandleByIndex(cuda_idx) 58 | 59 | def run_benchmark(pipe, duration): 60 | """Run the GPU benchmark for the specified duration in seconds.""" 61 | # Get the correct NVML handle for the GPU being used 62 | handle = get_nvml_device_handle() 63 | 64 | # Setup variables 65 | image_count = 0 66 | total_gpu_time = 0 67 | temp_readings = [] 68 | power_readings = [] 69 | 70 | # Start benchmark 71 | start_time = time.time() 72 | end_time = start_time + duration 73 | prompt = "a photo of an astronaut riding a horse on mars" 74 | 75 | try: 76 | # Disable progress bar for the pipeline 77 | pipe.set_progress_bar_config(disable=True) 78 | 79 | # Create a progress bar for the entire benchmark 80 | with tqdm(total=100, desc="Benchmark progress", unit="%") as pbar: 81 | # Calculate update amount per check 82 | last_update_time = start_time 83 | last_update_percent = 0 84 | 85 | # Run until time is up 86 | while time.time() < end_time: 87 | # Get GPU temperature 88 | current_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU) 89 | temp_readings.append(current_temp) 90 | 91 | # CUDA timing events 92 | start_event = torch.cuda.Event(enable_timing=True) 93 | end_event = torch.cuda.Event(enable_timing=True) 94 | torch.cuda.synchronize() 95 | 96 | # Record start time and generate image 97 | start_event.record() 98 | image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0] 99 | end_event.record() 100 | torch.cuda.synchronize() 101 | 102 | # Calculate timing 103 | gpu_time_ms = start_event.elapsed_time(end_event) 104 | total_gpu_time += gpu_time_ms 105 | 106 | # Update counter 107 | image_count += 1 108 | 109 | # Sample power usage 110 | try: 111 | current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0 # mW to W 112 | power_readings.append(current_power) 113 | except: 114 | pass 115 | 116 | # Update progress bar 117 | current_time = time.time() 118 | current_percent = min(100, int((current_time - start_time) / duration * 100)) 119 | if current_percent > last_update_percent: 120 | pbar.update(current_percent - last_update_percent) 121 | pbar.set_postfix({ 122 | 'Images': image_count, 123 | 'Temp': f"{current_temp}°C" 124 | }) 125 | last_update_percent = current_percent 126 | 127 | # Final temperature reading 128 | final_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU) 129 | temp_readings.append(final_temp) 130 | 131 | # Calculate results 132 | elapsed = time.time() - start_time 133 | avg_time_ms = total_gpu_time / image_count if image_count > 0 else 0 134 | avg_temp = sum(temp_readings) / len(temp_readings) 135 | max_temp = max(temp_readings) 136 | 137 | # Get GPU power info 138 | try: 139 | power_usage = round(pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0, 2) # mW to W with 2 decimal places 140 | except: 141 | power_usage = None 142 | 143 | # Get GPU memory info 144 | try: 145 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) 146 | gpu_memory_total = round(meminfo.total / (1024 * 1024 * 1024), 2) # bytes to GB 147 | except: 148 | gpu_memory_total = None 149 | 150 | # Get platform info 151 | platform_info = get_clean_platform() 152 | 153 | # Get CUDA version (acceleration) 154 | cuda_version = f"CUDA {torch.version.cuda}" if torch.cuda.is_available() else "N/A" 155 | 156 | # Get torch version 157 | torch_version = torch.__version__ 158 | 159 | # Clean up 160 | pynvml.nvmlShutdown() 161 | 162 | # Calculate average power 163 | avg_power = round(sum(power_readings) / len(power_readings), 2) if power_readings else None 164 | 165 | # Return benchmark results with completed flag 166 | return { 167 | "completed": True, # Flag indicating the benchmark completed successfully 168 | "result": image_count, 169 | "max_temp": max_temp, 170 | "avg_temp": avg_temp, 171 | "elapsed_time": elapsed, 172 | "avg_time_ms": avg_time_ms, 173 | "gpu_utilization": (total_gpu_time/1000)/elapsed*100, 174 | "gpu_power_watts": avg_power, 175 | "gpu_memory_total": gpu_memory_total, 176 | "platform": platform_info, 177 | "acceleration": cuda_version, 178 | "torch_version": torch_version 179 | } 180 | 181 | except KeyboardInterrupt: 182 | # Clean up and return partial results with completed flag set to False 183 | pynvml.nvmlShutdown() 184 | return { 185 | "completed": False, # Flag indicating the benchmark was canceled 186 | "result": image_count, 187 | "max_temp": max(temp_readings) if temp_readings else 0, 188 | "avg_temp": sum(temp_readings)/len(temp_readings) if temp_readings else 0 189 | } 190 | except Exception as e: 191 | # Handle any other errors, clean up, and return error info 192 | pynvml.nvmlShutdown() 193 | print(f"Error during benchmark: {e}") 194 | return { 195 | "completed": False, # Flag indicating the benchmark failed 196 | "error": str(e), 197 | "result": image_count 198 | } -------------------------------------------------------------------------------- /src/gpu_benchmark/benchmarks/qwen3_0_6b.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoModelForCausalLM, AutoTokenizer, logging 2 | import torch 3 | import time 4 | from tqdm import tqdm 5 | import pynvml 6 | import platform 7 | import os 8 | import random 9 | import numpy as np 10 | 11 | # Disable all warnings and progress bars from transformers 12 | logging.set_verbosity_error() 13 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 14 | 15 | def get_clean_platform(): 16 | os_platform = platform.system() 17 | if os_platform == "Linux": 18 | try: 19 | with open("/etc/os-release") as f: 20 | for line in f: 21 | if line.startswith("PRETTY_NAME="): 22 | return line.strip().split("=")[1].strip('"') 23 | except Exception: 24 | pass 25 | return f"Linux {platform.release()}" 26 | elif os_platform == "Windows": 27 | return f"Windows {platform.release()}" 28 | elif os_platform == "Darwin": 29 | return f"macOS {platform.mac_ver()[0]}" 30 | else: 31 | return os_platform 32 | 33 | def get_nvml_device_handle(): 34 | """Get the correct NVML device handle for the GPU being used.""" 35 | pynvml.nvmlInit() 36 | 37 | cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES') 38 | if cuda_visible_devices is not None: 39 | try: 40 | original_gpu_index = int(cuda_visible_devices.split(',')[0]) 41 | handle = pynvml.nvmlDeviceGetHandleByIndex(original_gpu_index) 42 | return handle 43 | except (ValueError, IndexError): 44 | print(f"Warning: Could not parse CUDA_VISIBLE_DEVICES={cuda_visible_devices}") 45 | 46 | cuda_idx = torch.cuda.current_device() 47 | return pynvml.nvmlDeviceGetHandleByIndex(cuda_idx) 48 | 49 | def setup_qwen_model(): 50 | model_name = "Qwen/Qwen3-0.6B" 51 | # Disable tokenizer warnings 52 | tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) 53 | model = AutoModelForCausalLM.from_pretrained( 54 | model_name, 55 | torch_dtype=torch.float16, 56 | device_map="auto", 57 | # Disable model warnings and progress bars 58 | use_cache=True, 59 | low_cpu_mem_usage=True, 60 | ) 61 | # Disable generation warnings 62 | model.generation_config.pad_token_id = tokenizer.pad_token_id 63 | model.config.pad_token_id = tokenizer.pad_token_id 64 | return model, tokenizer 65 | 66 | def run_benchmark(model, tokenizer, duration): 67 | """Run the GPU benchmark for the specified duration in seconds.""" 68 | handle = get_nvml_device_handle() 69 | 70 | # Setup variables 71 | generation_count = 0 72 | total_gpu_time = 0 73 | temp_readings = [] 74 | power_readings = [] 75 | 76 | # Start benchmark 77 | start_time = time.time() 78 | end_time = start_time + duration 79 | prompt = "Write a technical explanation of how GPUs process neural networks, in exactly 100 words." 80 | 81 | try: 82 | # Create a single progress bar for the entire benchmark 83 | with tqdm(total=100, desc="Benchmark progress", unit="%", ncols=100) as pbar: 84 | last_update_percent = 0 85 | 86 | while time.time() < end_time: 87 | # Get GPU temperature 88 | current_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU) 89 | temp_readings.append(current_temp) 90 | 91 | # CUDA timing events 92 | start_event = torch.cuda.Event(enable_timing=True) 93 | end_event = torch.cuda.Event(enable_timing=True) 94 | torch.cuda.synchronize() 95 | 96 | # Record start time and generate text 97 | start_event.record() 98 | 99 | # Generate text without warnings 100 | with torch.no_grad(): 101 | messages = [{"role": "user", "content": prompt}] 102 | text = tokenizer.apply_chat_template( 103 | messages, 104 | tokenize=False, 105 | add_generation_prompt=True, 106 | enable_thinking=False, 107 | add_special_tokens=False 108 | ) 109 | model_inputs = tokenizer([text], return_tensors="pt").to(model.device) 110 | 111 | generated_ids = model.generate( 112 | **model_inputs, 113 | max_new_tokens=256, 114 | do_sample=False, 115 | use_cache=True, 116 | pad_token_id=tokenizer.pad_token_id 117 | ) 118 | 119 | 120 | end_event.record() 121 | torch.cuda.synchronize() 122 | 123 | # Calculate timing 124 | gpu_time_ms = start_event.elapsed_time(end_event) 125 | total_gpu_time += gpu_time_ms 126 | 127 | # Update counter 128 | generation_count += 1 129 | 130 | # Sample power usage 131 | try: 132 | current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0 133 | power_readings.append(current_power) 134 | except: 135 | pass 136 | 137 | # Update progress bar only when percentage changes 138 | current_time = time.time() 139 | current_percent = min(100, int((current_time - start_time) / duration * 100)) 140 | if current_percent > last_update_percent: 141 | pbar.update(current_percent - last_update_percent) 142 | pbar.set_postfix({ 143 | 'Generations': generation_count, 144 | 'Temp': f"{current_temp}°C" 145 | }, refresh=True) 146 | last_update_percent = current_percent 147 | 148 | # Calculate results 149 | elapsed = time.time() - start_time 150 | avg_time_ms = total_gpu_time / generation_count if generation_count > 0 else 0 151 | avg_temp = sum(temp_readings) / len(temp_readings) 152 | max_temp = max(temp_readings) 153 | 154 | # Get GPU memory info 155 | try: 156 | meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) 157 | gpu_memory_total = round(meminfo.total / (1024 * 1024 * 1024), 2) 158 | except: 159 | gpu_memory_total = None 160 | 161 | # Calculate average power 162 | avg_power = round(sum(power_readings) / len(power_readings), 2) if power_readings else None 163 | 164 | # Clean up 165 | pynvml.nvmlShutdown() 166 | 167 | return { 168 | "completed": True, 169 | "result": generation_count, 170 | "max_temp": max_temp, 171 | "avg_temp": avg_temp, 172 | "elapsed_time": elapsed, 173 | "avg_time_ms": avg_time_ms, 174 | "gpu_utilization": (total_gpu_time/1000)/elapsed*100, 175 | "gpu_power_watts": avg_power, 176 | "gpu_memory_total": gpu_memory_total, 177 | "platform": get_clean_platform(), 178 | "acceleration": f"CUDA {torch.version.cuda}" if torch.cuda.is_available() else "N/A", 179 | "torch_version": torch.__version__ 180 | } 181 | 182 | except KeyboardInterrupt: 183 | pynvml.nvmlShutdown() 184 | return { 185 | "completed": False, 186 | "result": generation_count, 187 | "max_temp": max(temp_readings) if temp_readings else 0, 188 | "avg_temp": sum(temp_readings)/len(temp_readings) if temp_readings else 0, 189 | "avg_time_ms": total_gpu_time / generation_count if generation_count > 0 else 0 190 | } 191 | except Exception as e: 192 | pynvml.nvmlShutdown() 193 | print(f"Error during benchmark: {e}") 194 | return { 195 | "completed": False, 196 | "error": str(e), 197 | "result": generation_count, 198 | "avg_time_ms": total_gpu_time / generation_count if generation_count > 0 else 0 199 | } 200 | 201 | def load_pipeline(): 202 | """Load the Qwen model pipeline and return it.""" 203 | model_name = "Qwen/Qwen3-0.6B" 204 | # Disable tokenizer warnings 205 | tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) 206 | model = AutoModelForCausalLM.from_pretrained( 207 | model_name, 208 | torch_dtype=torch.float16, 209 | device_map="auto", 210 | use_cache=True, 211 | low_cpu_mem_usage=True, 212 | ) 213 | # Disable generation warnings 214 | model.generation_config.pad_token_id = tokenizer.pad_token_id 215 | model.config.pad_token_id = tokenizer.pad_token_id 216 | return model, tokenizer 217 | 218 | # if __name__ == "__main__": 219 | # # Load the model pipeline 220 | # model, tokenizer = load_pipeline() 221 | 222 | # # Run benchmark for 300 seconds (5 minutes) 223 | # results = run_benchmark(model, tokenizer, duration=300) 224 | 225 | # # Print results 226 | # print("\nBenchmark Results:") 227 | # print(f"Completed: {results['completed']}") 228 | # if results.get('error'): 229 | # print(f"Error: {results['error']}") 230 | # else: 231 | # print(f"Total generations: {results['result']}") 232 | # if 'avg_time_ms' in results: 233 | # print(f"Average generation time: {results['avg_time_ms']:.2f}ms") 234 | # print(f"GPU utilization: {results['gpu_utilization']:.2f}%") 235 | # print(f"Maximum GPU temperature: {results['max_temp']}°C") 236 | # print(f"Average GPU temperature: {results['avg_temp']:.2f}°C") 237 | # if results['gpu_power_watts']: 238 | # print(f"Average GPU power usage: {results['gpu_power_watts']}W") 239 | # print(f"GPU memory total: {results['gpu_memory_total']}GB") 240 | # print(f"Platform: {results['platform']}") 241 | # print(f"Acceleration: {results['acceleration']}") 242 | # print(f"PyTorch version: {results['torch_version']}") --------------------------------------------------------------------------------