├── src
    └── gpu_benchmark
    │   ├── __init__.py
    │   ├── benchmarks
    │       ├── __init__.py
    │       ├── stable_diffusion_1_5.py
    │       └── qwen3_0_6b.py
    │   ├── main.py
    │   └── database.py
├── .gitignore
├── MANIFEST.in
├── pyproject.toml
├── LICENSE
└── README.md


/src/gpu_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # src/gpu_benchmark/__init__.py
2 | from .database import upload_benchmark_results
3 | from .main import main


--------------------------------------------------------------------------------
/src/gpu_benchmark/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | # src/gpu_benchmark/benchmarks/__init__.py
2 | from . import stable_diffusion_1_5
3 | from . import qwen3_0_6b


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | env/
 3 | __pycache__/
 4 | *.pyc
 5 | *.pyo
 6 | *.pyd
 7 | *.pyw
 8 | *.egg-info
 9 | dist/
10 | build/
11 | *.log
12 | *.log.*
13 | *.log.*.*
14 | *.log.*.*.*
15 | *.log.*.*.*.*


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | # Exclude specific files and patterns
 2 | exclude .env
 3 | exclude *.ipynb
 4 | exclude .gitignore
 5 | exclude *.log
 6 | exclude *.txt
 7 | 
 8 | # Exclude entire directories
 9 | prune env
10 | prune .git
11 | prune __pycache__
12 | prune benchmark_results
13 | prune .ipynb_checkpoints
14 | 
15 | # Global exclusions (apply to all directories)
16 | global-exclude *.pyc
17 | global-exclude *.pyo
18 | global-exclude .DS_Store


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "gpu_benchmark"
 3 | version = "0.3.1"
 4 | description = "GPU benchmarking tool using Stable Diffusion"
 5 | readme = "README.md"
 6 | authors = [
 7 |     {name = "Max Hager", email = "maxhager28@gmail.com"}
 8 | ]
 9 | dependencies = [
10 |     "torch",
11 |     "tqdm",
12 |     "diffusers",
13 |     "transformers",
14 |     "accelerate",
15 |     "pynvml",
16 |     "supabase"
17 | ]
18 | 
19 | [project.scripts]
20 | gpu-benchmark = "gpu_benchmark.main:main"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Max Hager
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPU Benchmark by [United Compute](https://www.unitedcompute.ai)
 2 | 
 3 | A simple CLI tool to benchmark your GPU's performance with Stable Diffusion and compare results in our global benchmark results.
 4 | 
 5 | ![United Compute Logo](https://www.unitedcompute.ai/logo.png)
 6 | 
 7 | ## Installation
 8 | 
 9 | ```bash
10 | pip install gpu-benchmark
11 | ```
12 | 
13 | ## Usage
14 | 
15 | Run the benchmark (takes 5 minutes after the pipeline is loaded):
16 | 
17 | ```bash
18 | gpu-benchmark
19 | ```
20 | 
21 | ### Optional Arguments
22 | 
23 | If you're running on a cloud provider, specify it with the `--provider` flag:
24 | 
25 | ```bash
26 | gpu-benchmark --provider runpod
27 | ```
28 | 
29 | You can specify the model to use for the benchmark with the `--model` flag. By default, the Stable Diffusion 1.5 model is used.
30 | Example for running a different model:
31 | 
32 | ```bash
33 | gpu-benchmark --model qwen3-0-6b
34 | ```
35 | 
36 | For multi-GPU systems, you can select a specific GPU like this:
37 | 
38 | 1. Using the `--gpu` flag:
39 | 
40 | ```bash
41 | gpu-benchmark --gpu 1  # Uses GPU index 1
42 | ```
43 | 
44 | The tool will:
45 | 
46 | 1. Load a Stable Diffusion pipeline
47 | 2. Generate images for 5 minutes
48 | 3. Count image generations and track GPU temperature
49 | 4. Upload results to the [United Compute Benchmark Results](https://www.unitedcompute.ai/gpu-benchmark)
50 | 
51 | ## What it measures
52 | 
53 | - **Benchmark Score**: Number of iterations or images generated in 5 minutes (model-dependent)
54 | - **GPU Model**: The specific model of your GPU (e.g., NVIDIA GeForce RTX 4090)
55 | - **Max Heat**: Maximum GPU temperature reached (°C)
56 | - **Avg Heat**: Average GPU temperature during the benchmark (°C)
57 | - **Country**: Your location (detected automatically)
58 | - **GPU Power**: Power consumption in watts (W)
59 | - **GPU Memory**: Total GPU memory in gigabytes (GB)
60 | - **Platform**: Operating system information
61 | - **Acceleration**: CUDA version
62 | - **PyTorch Version**: PyTorch library version
63 | 
64 | ## Requirements
65 | 
66 | - CUDA-compatible NVIDIA GPU
67 | - Python 3.8+
68 | 
69 | ## Links
70 | 
71 | - [Official Website](https://www.unitedcompute.ai)
72 | - [GPU Benchmark Results](https://www.unitedcompute.ai/gpu-benchmark)
73 | 


--------------------------------------------------------------------------------
/src/gpu_benchmark/main.py:
--------------------------------------------------------------------------------
  1 | # src/gpu_benchmark/main.py
  2 | from .benchmarks import stable_diffusion_1_5, qwen3_0_6b
  3 | from .database import upload_benchmark_results
  4 | import argparse
  5 | import torch 
  6 | 
  7 | # Import benchmark runners dynamically or add specific imports here later
  8 | # For now, let's assume functions like run_stable_diffusion_benchmark, run_llm_benchmark
  9 | # will be available from src.gpu_benchmark.benchmarks
 10 | # from .benchmarks import stable_diffusion # This will be created
 11 | # from .utils import get_clean_platform # This will be created, assuming get_clean_platform moves to utils
 12 | 
 13 | def main():
 14 |     """Entry point for the GPU benchmark command-line tool."""
 15 |     # Parse command-line arguments
 16 |     parser = argparse.ArgumentParser(description="GPU Benchmark by United Compute")
 17 |     parser.add_argument("--provider", type=str, help="Cloud provider (e.g., RunPod, AWS, GCP) or Private", default="Private")
 18 |     parser.add_argument("--gpu", type=int, help="GPU device index to use (defaults to CUDA_VISIBLE_DEVICES or 0)", default=None)
 19 |     parser.add_argument(
 20 |         "--model", 
 21 |         type=str, 
 22 |         help="Model to benchmark (e.g., stable-diffusion-1-5, qwen3-0-6b)", 
 23 |         default="stable-diffusion-1-5",
 24 |         choices=["stable-diffusion-1-5", "qwen3-0-6b"]
 25 |     )
 26 |     args = parser.parse_args()
 27 |     
 28 |     # If GPU device is specified, set it
 29 |     if args.gpu is not None:
 30 |         torch.cuda.set_device(args.gpu)
 31 |     
 32 |     # Convert provider to lowercase
 33 |     provider = args.provider.lower()
 34 |     
 35 |     # Simple start message
 36 |     print(f"GPU Benchmark starting for model: {args.model}...")
 37 |     print("This benchmark will run for 5 minutes")
 38 |     
 39 |     # Fixed duration
 40 |     duration = 300  # 300 seconds
 41 |     
 42 |     results = None
 43 |     if args.model == "stable-diffusion-1-5":
 44 |         print("Loading Stable Diffusion 1.5 pipeline...")
 45 |         pipe = stable_diffusion_1_5.load_pipeline() 
 46 |         print("Pipeline loaded successfully!")
 47 |         
 48 |         print("Running Stable Diffusion 1.5 benchmark...")
 49 |         results = stable_diffusion_1_5.run_benchmark(pipe=pipe, duration=duration)
 50 |     elif args.model == "qwen3-0-6b":
 51 |         print("Loading Qwen3-0-6B model...")
 52 |         model, tokenizer = qwen3_0_6b.load_pipeline()
 53 |         
 54 |         print("Running Qwen3-0-6B benchmark...")
 55 |         results = qwen3_0_6b.run_benchmark(model=model, tokenizer=tokenizer, duration=duration)
 56 |     else:
 57 |         print(f"Error: Model {args.model} not supported.")
 58 |         return
 59 | 
 60 |     # Only proceed if the benchmark completed successfully (not canceled)
 61 |     if results and results.get("completed", False):
 62 |         primary_metric_val = None
 63 |         max_temp_val = None
 64 |         avg_temp_val = None
 65 |         gpu_memory_val = None
 66 | 
 67 |         # Get the primary metric using the generic 'result' key
 68 |         primary_metric_val = results.get('result')
 69 | 
 70 |         if args.model == "stable-diffusion-1-5":
 71 |             max_temp_val = results.get('max_temp')
 72 |             avg_temp_val = results.get('avg_temp')
 73 |             gpu_memory_val = results.get('gpu_memory_total')
 74 |         elif args.model == "qwen3-0-6b":
 75 |             max_temp_val = results.get('max_temp')
 76 |             avg_temp_val = results.get('avg_temp')
 77 |             gpu_memory_val = results.get('gpu_memory_total')
 78 |         
 79 |         # The upload_benchmark_results function will print the success message and ID.
 80 |         upload_benchmark_results(
 81 |             model_name=args.model,
 82 |             primary_metric_value=primary_metric_val, # This is now consistently from results.get('result')
 83 |             max_temp=max_temp_val,
 84 |             avg_temp=avg_temp_val,
 85 |             cloud_provider=provider,
 86 |             gpu_power_watts=results.get('gpu_power_watts'),
 87 |             gpu_memory_total=gpu_memory_val, 
 88 |             platform=results.get('platform'),
 89 |             acceleration=results.get('acceleration'),
 90 |             torch_version=results.get('torch_version')
 91 |         )
 92 |         
 93 |         print("Benchmark completed") # Final confirmation message
 94 |     elif results and results.get("error"):
 95 |         print(f"\nBenchmark failed: {results.get('error')}")
 96 |     elif results is None and args.model != "stable-diffusion-1-5" and args.model != "qwen3-0-6b": # Model not supported
 97 |         pass # Error already printed
 98 |     else:
 99 |         print("\nBenchmark was canceled or did not complete. Results not submitted.")
100 |         if results and results.get("reason") == "canceled":
101 |              # When printing items processed before cancellation, also use 'result'
102 |              items_before_cancel = results.get('result', 0)
103 |              if args.model == "qwen3-0-6b":
104 |                   print(f"Generations processed before cancellation: {items_before_cancel}")
105 |              elif args.model == "stable-diffusion-1-5":
106 |                   print(f"Images generated before cancellation: {items_before_cancel}")
107 | 
108 | if __name__ == "__main__":
109 |     main()


--------------------------------------------------------------------------------
/src/gpu_benchmark/database.py:
--------------------------------------------------------------------------------
  1 | # src/gpu_benchmark/database.py
  2 | import requests
  3 | import datetime
  4 | import torch
  5 | 
  6 | # Hardcoded Supabase credentials (anon key is designed to be public)
  7 | SUPABASE_URL = "https://jftqjabhnesfphpkoilc.supabase.co"
  8 | SUPABASE_ANON_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImpmdHFqYWJobmVzZnBocGtvaWxjIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDQ5NzI4NzIsImV4cCI6MjA2MDU0ODg3Mn0.S0ZdRIauUyMhdVJtYFNquvnlW3dV1wxERy7YrurZyag"
  9 | 
 10 | def country_code_to_flag(country_code):
 11 |     """Convert country code to flag emoji."""
 12 |     if len(country_code) != 2 or not country_code.isalpha():
 13 |         return "🏳️"  # White flag for unknown
 14 |     
 15 |     # Convert each letter to regional indicator symbol
 16 |     # A-Z: 0x41-0x5A -> regional indicators: 0x1F1E6-0x1F1FF
 17 |     return ''.join(chr(ord(c.upper()) - ord('A') + ord('🇦')) for c in country_code)
 18 | 
 19 | def get_country_flag():
 20 |     """Get country flag emoji based on IP."""
 21 |     try:
 22 |         country_response = requests.get("https://ipinfo.io/json")
 23 |         country_code = country_response.json().get("country", "Unknown")
 24 |         return country_code_to_flag(country_code)
 25 |     except Exception as e:
 26 |         print(f"Error getting country info: {e}")
 27 |         return "🏳️"  # White flag for unknown
 28 | 
 29 | def upload_benchmark_results(model_name: str, primary_metric_value: int, max_temp: float, avg_temp: float, cloud_provider: str = "Private", **kwargs):
 30 |     """Upload benchmark results to Supabase database.
 31 |     
 32 |     Args:
 33 |         model_name: Name of the model ("stable-diffusion-1-5", "qwen3-0-6b") to determine the target table.
 34 |         primary_metric_value: Value for the primary metric (e.g., images generated or generations processed),
 35 |                               which will be stored in the 'result' column.
 36 |         max_temp: Maximum GPU temperature recorded.
 37 |         avg_temp: Average GPU temperature recorded.
 38 |         cloud_provider: Cloud provider name (default: "Private").
 39 |         **kwargs: Additional fields to upload (e.g., gpu_power_watts, gpu_memory_total).
 40 |         
 41 |     Returns:
 42 |         tuple: (success, message, record_id)
 43 |     """
 44 |     
 45 |     table_name = ""
 46 |     metric_column_name = "result" # Generic column name for the primary metric
 47 | 
 48 |     if model_name == "stable-diffusion-1-5":
 49 |         table_name = "stable-diffusion-1-5"
 50 |     elif model_name == "qwen3-0-6b":
 51 |         table_name = "qwen3-0-6b"
 52 |     else:
 53 |         err_msg = f"Unsupported model_name '{model_name}' for database upload."
 54 |         print(f"❌ {err_msg}")
 55 |         return False, err_msg, None
 56 | 
 57 |     # Get country flag
 58 |     flag_emoji = get_country_flag()
 59 |     
 60 |     # Prepare benchmark results
 61 |     benchmark_data = {
 62 |         "created_at": datetime.datetime.now().isoformat(),
 63 |         "gpu_type": torch.cuda.get_device_name(torch.cuda.current_device()) if torch.cuda.is_available() else "N/A",
 64 |         metric_column_name: primary_metric_value, # Using "result" as the column name
 65 |         "max_heat": int(max_temp) if max_temp is not None else None,
 66 |         "avg_heat": int(avg_temp) if avg_temp is not None else None,
 67 |         "country": flag_emoji,
 68 |         "provider": cloud_provider
 69 |     }
 70 |     
 71 |     # Add additional fields if provided.
 72 |     additional_fields_expected = [
 73 |         "gpu_power_watts", "gpu_memory_total", "platform", 
 74 |         "acceleration", "torch_version"
 75 |     ]
 76 |     
 77 |     for field in additional_fields_expected:
 78 |         if field in kwargs and kwargs[field] is not None:
 79 |             benchmark_data[field] = kwargs[field]
 80 |     
 81 |     api_url = f"{SUPABASE_URL}/rest/v1/{table_name}" # Dynamic table name
 82 |     
 83 |     try:
 84 |         response = requests.post(
 85 |             api_url,
 86 |             json=benchmark_data,
 87 |             headers={
 88 |                 "Content-Type": "application/json",
 89 |                 "apikey": SUPABASE_ANON_KEY,
 90 |                 "Authorization": f"Bearer {SUPABASE_ANON_KEY}",
 91 |                 "Prefer": "return=representation"
 92 |             }
 93 |         )
 94 |         
 95 |         if response.status_code in (200, 201):
 96 |             try:
 97 |                 record_data = response.json()
 98 |                 if isinstance(record_data, list) and len(record_data) > 0:
 99 |                     record_id = record_data[0].get('id')
100 |                     print(f"✅ Results uploaded successfully to benchmark results!")
101 |                     print(f"Your ID at www.unitedcompute.ai/gpu-benchmark: {record_id}")
102 |                     return True, "Upload successful", record_id
103 |                 else:
104 |                     print(f"✅ Upload successful, but couldn't retrieve ID from response: {record_data}")
105 |                     return True, "Upload successful, but couldn't retrieve ID", None
106 |             except ValueError as e: # Catch JSON decoding errors
107 |                 print(f"✅ Upload reported success (status {response.status_code}), but failed to parse JSON response: {e}. Response text: '{response.text}'")
108 |                 return True, f"Upload successful (status {response.status_code}), but error parsing response", None
109 |         else:
110 |             error_details = f"Status Code: {response.status_code}. Response Body: '{response.text}'. Headers: {response.headers}"
111 |             error_message = f"Failed to upload results to Supabase. {error_details}"
112 |             print(f"❌ Database Upload Error: {error_message}")
113 |             if response.status_code == 400:
114 |                 print("Hint (400 Bad Request): This might be due to a mismatch between the data sent and the table schema in Supabase (e.g., wrong data types for columns, missing required columns that are not nullable, or malformed JSON). Check the 'Response Body' above for specific column errors from Supabase.")
115 |             elif response.status_code == 401:
116 |                 print("Hint (401 Unauthorized): Check if the Supabase ANON_KEY is correct and has the necessary INSERT permissions for the table. Review Row Level Security (RLS) policies on the table.")
117 |             elif response.status_code == 403:
118 |                  print("Hint (403 Forbidden): The request was understood, but refused. This often relates to permissions, possibly RLS policies or service-level API key permissions for insert operations on the target table.")
119 |             elif response.status_code == 404:
120 |                 print(f"Hint (404 Not Found): Check if the table_name '{table_name}' is correct and the API endpoint '{api_url}' is valid. The table might not exist or the URL path could be wrong.")
121 |             return False, error_message, None
122 |             
123 |     except requests.exceptions.ConnectionError as e:
124 |         error_message = f"Network Connection Error: Failed to connect to Supabase at {SUPABASE_URL}. Details: {e}"
125 |         print(f"❌ {error_message}")
126 |         print("Troubleshooting: Check your internet connection and firewall settings. Ensure Supabase services are operational.")
127 |         return False, error_message, None
128 |     except requests.exceptions.Timeout as e:
129 |         error_message = f"Request Timeout: The request to Supabase timed out. URL: {api_url}. Details: {e}"
130 |         print(f"❌ {error_message}")
131 |         print("Troubleshooting: Check your network connection. The Supabase server might be overloaded or slow to respond.")
132 |         return False, error_message, None
133 |     except requests.exceptions.RequestException as e: # Catches other requests-related errors (e.g., invalid URL)
134 |         error_message = f"Request Error: An error occurred during the request to Supabase. URL: {api_url}. Details: {type(e).__name__} - {e}"
135 |         print(f"❌ {error_message}")
136 |         return False, error_message, None
137 |     except Exception as e:
138 |         import traceback
139 |         error_message = f"Unexpected Error: An unexpected Python error occurred during database upload. Details: {type(e).__name__} - {e}"
140 |         print(f"❌ {error_message}")
141 |         traceback.print_exc()
142 |         return False, error_message, None


--------------------------------------------------------------------------------
/src/gpu_benchmark/benchmarks/stable_diffusion_1_5.py:
--------------------------------------------------------------------------------
  1 | # src/gpu_benchmark/benchmark.py
  2 | import torch
  3 | import time
  4 | from tqdm import tqdm
  5 | import pynvml
  6 | from diffusers import StableDiffusionPipeline
  7 | import platform
  8 | import os
  9 | 
 10 | def get_clean_platform():
 11 |     os_platform = platform.system()
 12 |     if os_platform == "Linux":
 13 |         try:
 14 |             with open("/etc/os-release") as f:
 15 |                 for line in f:
 16 |                     if line.startswith("PRETTY_NAME="):
 17 |                         return line.strip().split("=")[1].strip('"')
 18 |         except Exception:
 19 |             pass
 20 |         return f"Linux {platform.release()}"
 21 |     elif os_platform == "Windows":
 22 |         return f"Windows {platform.release()}"
 23 |     elif os_platform == "Darwin":
 24 |         return f"macOS {platform.mac_ver()[0]}"
 25 |     else:
 26 |         return os_platform
 27 | 
 28 | def load_pipeline():
 29 |     """Load the Stable Diffusion pipeline and return it."""    
 30 |     model_id = "yachty66/stable-diffusion-v1-5"
 31 |     pipe = StableDiffusionPipeline.from_pretrained(
 32 |         model_id, 
 33 |         torch_dtype=torch.float16,
 34 |         low_cpu_mem_usage=True
 35 |     )
 36 |     pipe = pipe.to("cuda")
 37 |     return pipe
 38 | 
 39 | def get_nvml_device_handle():
 40 |     """Get the correct NVML device handle for the GPU being used."""
 41 |     pynvml.nvmlInit()
 42 |     
 43 |     # Check CUDA_VISIBLE_DEVICES first
 44 |     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
 45 |     if cuda_visible_devices is not None:
 46 |         try:
 47 |             # When CUDA_VISIBLE_DEVICES is set, the first (and only) visible GPU
 48 |             # becomes index 0 to CUDA, but we need to use the original index for NVML
 49 |             original_gpu_index = int(cuda_visible_devices.split(',')[0])
 50 |             handle = pynvml.nvmlDeviceGetHandleByIndex(original_gpu_index)
 51 |             return handle
 52 |         except (ValueError, IndexError):
 53 |             print(f"Warning: Could not parse CUDA_VISIBLE_DEVICES={cuda_visible_devices}")
 54 |     
 55 |     # Fallback to current CUDA device
 56 |     cuda_idx = torch.cuda.current_device()
 57 |     return pynvml.nvmlDeviceGetHandleByIndex(cuda_idx)
 58 | 
 59 | def run_benchmark(pipe, duration):
 60 |     """Run the GPU benchmark for the specified duration in seconds."""
 61 |     # Get the correct NVML handle for the GPU being used
 62 |     handle = get_nvml_device_handle()
 63 |     
 64 |     # Setup variables
 65 |     image_count = 0
 66 |     total_gpu_time = 0
 67 |     temp_readings = []
 68 |     power_readings = []
 69 |     
 70 |     # Start benchmark
 71 |     start_time = time.time()
 72 |     end_time = start_time + duration
 73 |     prompt = "a photo of an astronaut riding a horse on mars"
 74 |     
 75 |     try:
 76 |         # Disable progress bar for the pipeline
 77 |         pipe.set_progress_bar_config(disable=True)
 78 |         
 79 |         # Create a progress bar for the entire benchmark
 80 |         with tqdm(total=100, desc="Benchmark progress", unit="%") as pbar:
 81 |             # Calculate update amount per check
 82 |             last_update_time = start_time
 83 |             last_update_percent = 0
 84 |             
 85 |             # Run until time is up
 86 |             while time.time() < end_time:
 87 |                 # Get GPU temperature
 88 |                 current_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
 89 |                 temp_readings.append(current_temp)
 90 |                 
 91 |                 # CUDA timing events
 92 |                 start_event = torch.cuda.Event(enable_timing=True)
 93 |                 end_event = torch.cuda.Event(enable_timing=True)
 94 |                 torch.cuda.synchronize()
 95 |                 
 96 |                 # Record start time and generate image
 97 |                 start_event.record()
 98 |                 image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
 99 |                 end_event.record()
100 |                 torch.cuda.synchronize()
101 |                 
102 |                 # Calculate timing
103 |                 gpu_time_ms = start_event.elapsed_time(end_event)
104 |                 total_gpu_time += gpu_time_ms
105 |                 
106 |                 # Update counter
107 |                 image_count += 1
108 |                 
109 |                 # Sample power usage
110 |                 try:
111 |                     current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0  # mW to W
112 |                     power_readings.append(current_power)
113 |                 except:
114 |                     pass
115 |                 
116 |                 # Update progress bar
117 |                 current_time = time.time()
118 |                 current_percent = min(100, int((current_time - start_time) / duration * 100))
119 |                 if current_percent > last_update_percent:
120 |                     pbar.update(current_percent - last_update_percent)
121 |                     pbar.set_postfix({
122 |                         'Images': image_count, 
123 |                         'Temp': f"{current_temp}°C"
124 |                     })
125 |                     last_update_percent = current_percent
126 |         
127 |         # Final temperature reading
128 |         final_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
129 |         temp_readings.append(final_temp)
130 |         
131 |         # Calculate results
132 |         elapsed = time.time() - start_time
133 |         avg_time_ms = total_gpu_time / image_count if image_count > 0 else 0
134 |         avg_temp = sum(temp_readings) / len(temp_readings)
135 |         max_temp = max(temp_readings)
136 |         
137 |         # Get GPU power info
138 |         try:
139 |             power_usage = round(pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0, 2)  # mW to W with 2 decimal places
140 |         except:
141 |             power_usage = None
142 |         
143 |         # Get GPU memory info
144 |         try:
145 |             meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
146 |             gpu_memory_total = round(meminfo.total / (1024 * 1024 * 1024), 2)  # bytes to GB
147 |         except:
148 |             gpu_memory_total = None
149 |         
150 |         # Get platform info
151 |         platform_info = get_clean_platform()
152 |         
153 |         # Get CUDA version (acceleration)
154 |         cuda_version = f"CUDA {torch.version.cuda}" if torch.cuda.is_available() else "N/A"
155 |         
156 |         # Get torch version
157 |         torch_version = torch.__version__
158 |         
159 |         # Clean up
160 |         pynvml.nvmlShutdown()
161 | 
162 |         # Calculate average power
163 |         avg_power = round(sum(power_readings) / len(power_readings), 2) if power_readings else None
164 | 
165 |         # Return benchmark results with completed flag
166 |         return {
167 |             "completed": True,  # Flag indicating the benchmark completed successfully
168 |             "result": image_count,
169 |             "max_temp": max_temp,
170 |             "avg_temp": avg_temp,
171 |             "elapsed_time": elapsed,
172 |             "avg_time_ms": avg_time_ms,
173 |             "gpu_utilization": (total_gpu_time/1000)/elapsed*100,
174 |             "gpu_power_watts": avg_power,
175 |             "gpu_memory_total": gpu_memory_total,
176 |             "platform": platform_info,
177 |             "acceleration": cuda_version,
178 |             "torch_version": torch_version
179 |         }
180 |     
181 |     except KeyboardInterrupt:
182 |         # Clean up and return partial results with completed flag set to False
183 |         pynvml.nvmlShutdown()
184 |         return {
185 |             "completed": False,  # Flag indicating the benchmark was canceled
186 |             "result": image_count,
187 |             "max_temp": max(temp_readings) if temp_readings else 0,
188 |             "avg_temp": sum(temp_readings)/len(temp_readings) if temp_readings else 0
189 |         }
190 |     except Exception as e:
191 |         # Handle any other errors, clean up, and return error info
192 |         pynvml.nvmlShutdown()
193 |         print(f"Error during benchmark: {e}")
194 |         return {
195 |             "completed": False,  # Flag indicating the benchmark failed
196 |             "error": str(e),
197 |             "result": image_count
198 |         }


--------------------------------------------------------------------------------
/src/gpu_benchmark/benchmarks/qwen3_0_6b.py:
--------------------------------------------------------------------------------
  1 | from transformers import AutoModelForCausalLM, AutoTokenizer, logging
  2 | import torch
  3 | import time
  4 | from tqdm import tqdm
  5 | import pynvml
  6 | import platform
  7 | import os
  8 | import random
  9 | import numpy as np
 10 | 
 11 | # Disable all warnings and progress bars from transformers
 12 | logging.set_verbosity_error()
 13 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
 14 | 
 15 | def get_clean_platform():
 16 |     os_platform = platform.system()
 17 |     if os_platform == "Linux":
 18 |         try:
 19 |             with open("/etc/os-release") as f:
 20 |                 for line in f:
 21 |                     if line.startswith("PRETTY_NAME="):
 22 |                         return line.strip().split("=")[1].strip('"')
 23 |         except Exception:
 24 |             pass
 25 |         return f"Linux {platform.release()}"
 26 |     elif os_platform == "Windows":
 27 |         return f"Windows {platform.release()}"
 28 |     elif os_platform == "Darwin":
 29 |         return f"macOS {platform.mac_ver()[0]}"
 30 |     else:
 31 |         return os_platform
 32 | 
 33 | def get_nvml_device_handle():
 34 |     """Get the correct NVML device handle for the GPU being used."""
 35 |     pynvml.nvmlInit()
 36 |     
 37 |     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
 38 |     if cuda_visible_devices is not None:
 39 |         try:
 40 |             original_gpu_index = int(cuda_visible_devices.split(',')[0])
 41 |             handle = pynvml.nvmlDeviceGetHandleByIndex(original_gpu_index)
 42 |             return handle
 43 |         except (ValueError, IndexError):
 44 |             print(f"Warning: Could not parse CUDA_VISIBLE_DEVICES={cuda_visible_devices}")
 45 |     
 46 |     cuda_idx = torch.cuda.current_device()
 47 |     return pynvml.nvmlDeviceGetHandleByIndex(cuda_idx)
 48 | 
 49 | def setup_qwen_model():
 50 |     model_name = "Qwen/Qwen3-0.6B"
 51 |     # Disable tokenizer warnings
 52 |     tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 53 |     model = AutoModelForCausalLM.from_pretrained(
 54 |         model_name,
 55 |         torch_dtype=torch.float16,
 56 |         device_map="auto",
 57 |         # Disable model warnings and progress bars
 58 |         use_cache=True,
 59 |         low_cpu_mem_usage=True,
 60 |     )
 61 |     # Disable generation warnings
 62 |     model.generation_config.pad_token_id = tokenizer.pad_token_id
 63 |     model.config.pad_token_id = tokenizer.pad_token_id
 64 |     return model, tokenizer
 65 | 
 66 | def run_benchmark(model, tokenizer, duration):
 67 |     """Run the GPU benchmark for the specified duration in seconds."""
 68 |     handle = get_nvml_device_handle()
 69 |     
 70 |     # Setup variables
 71 |     generation_count = 0
 72 |     total_gpu_time = 0
 73 |     temp_readings = []
 74 |     power_readings = []
 75 |     
 76 |     # Start benchmark
 77 |     start_time = time.time()
 78 |     end_time = start_time + duration
 79 |     prompt = "Write a technical explanation of how GPUs process neural networks, in exactly 100 words."
 80 |     
 81 |     try:
 82 |         # Create a single progress bar for the entire benchmark
 83 |         with tqdm(total=100, desc="Benchmark progress", unit="%", ncols=100) as pbar:
 84 |             last_update_percent = 0
 85 |             
 86 |             while time.time() < end_time:
 87 |                 # Get GPU temperature
 88 |                 current_temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
 89 |                 temp_readings.append(current_temp)
 90 |                 
 91 |                 # CUDA timing events
 92 |                 start_event = torch.cuda.Event(enable_timing=True)
 93 |                 end_event = torch.cuda.Event(enable_timing=True)
 94 |                 torch.cuda.synchronize()
 95 |                 
 96 |                 # Record start time and generate text
 97 |                 start_event.record()
 98 |                 
 99 |                 # Generate text without warnings
100 |                 with torch.no_grad():
101 |                     messages = [{"role": "user", "content": prompt}]
102 |                     text = tokenizer.apply_chat_template(
103 |                         messages,
104 |                         tokenize=False,
105 |                         add_generation_prompt=True,
106 |                         enable_thinking=False,
107 |                         add_special_tokens=False
108 |                     )
109 |                     model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
110 |                     
111 |                     generated_ids = model.generate(
112 |                         **model_inputs,
113 |                         max_new_tokens=256,
114 |                         do_sample=False,
115 |                         use_cache=True,
116 |                         pad_token_id=tokenizer.pad_token_id
117 |                     )
118 | 
119 |                 
120 |                 end_event.record()
121 |                 torch.cuda.synchronize()
122 |                 
123 |                 # Calculate timing
124 |                 gpu_time_ms = start_event.elapsed_time(end_event)
125 |                 total_gpu_time += gpu_time_ms
126 |                 
127 |                 # Update counter
128 |                 generation_count += 1
129 |                 
130 |                 # Sample power usage
131 |                 try:
132 |                     current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0
133 |                     power_readings.append(current_power)
134 |                 except:
135 |                     pass
136 |                 
137 |                 # Update progress bar only when percentage changes
138 |                 current_time = time.time()
139 |                 current_percent = min(100, int((current_time - start_time) / duration * 100))
140 |                 if current_percent > last_update_percent:
141 |                     pbar.update(current_percent - last_update_percent)
142 |                     pbar.set_postfix({
143 |                         'Generations': generation_count, 
144 |                         'Temp': f"{current_temp}°C"
145 |                     }, refresh=True)
146 |                     last_update_percent = current_percent
147 |         
148 |         # Calculate results
149 |         elapsed = time.time() - start_time
150 |         avg_time_ms = total_gpu_time / generation_count if generation_count > 0 else 0
151 |         avg_temp = sum(temp_readings) / len(temp_readings)
152 |         max_temp = max(temp_readings)
153 |         
154 |         # Get GPU memory info
155 |         try:
156 |             meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
157 |             gpu_memory_total = round(meminfo.total / (1024 * 1024 * 1024), 2)
158 |         except:
159 |             gpu_memory_total = None
160 |         
161 |         # Calculate average power
162 |         avg_power = round(sum(power_readings) / len(power_readings), 2) if power_readings else None
163 |         
164 |         # Clean up
165 |         pynvml.nvmlShutdown()
166 |         
167 |         return {
168 |             "completed": True,
169 |             "result": generation_count,
170 |             "max_temp": max_temp,
171 |             "avg_temp": avg_temp,
172 |             "elapsed_time": elapsed,
173 |             "avg_time_ms": avg_time_ms,
174 |             "gpu_utilization": (total_gpu_time/1000)/elapsed*100,
175 |             "gpu_power_watts": avg_power,
176 |             "gpu_memory_total": gpu_memory_total,
177 |             "platform": get_clean_platform(),
178 |             "acceleration": f"CUDA {torch.version.cuda}" if torch.cuda.is_available() else "N/A",
179 |             "torch_version": torch.__version__
180 |         }
181 |     
182 |     except KeyboardInterrupt:
183 |         pynvml.nvmlShutdown()
184 |         return {
185 |             "completed": False,
186 |             "result": generation_count,
187 |             "max_temp": max(temp_readings) if temp_readings else 0,
188 |             "avg_temp": sum(temp_readings)/len(temp_readings) if temp_readings else 0,
189 |             "avg_time_ms": total_gpu_time / generation_count if generation_count > 0 else 0
190 |         }
191 |     except Exception as e:
192 |         pynvml.nvmlShutdown()
193 |         print(f"Error during benchmark: {e}")
194 |         return {
195 |             "completed": False,
196 |             "error": str(e),
197 |             "result": generation_count,
198 |             "avg_time_ms": total_gpu_time / generation_count if generation_count > 0 else 0
199 |         }
200 | 
201 | def load_pipeline():
202 |     """Load the Qwen model pipeline and return it."""    
203 |     model_name = "Qwen/Qwen3-0.6B"
204 |     # Disable tokenizer warnings
205 |     tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
206 |     model = AutoModelForCausalLM.from_pretrained(
207 |         model_name,
208 |         torch_dtype=torch.float16,
209 |         device_map="auto",
210 |         use_cache=True,
211 |         low_cpu_mem_usage=True,
212 |     )
213 |     # Disable generation warnings
214 |     model.generation_config.pad_token_id = tokenizer.pad_token_id
215 |     model.config.pad_token_id = tokenizer.pad_token_id
216 |     return model, tokenizer
217 | 
218 | # if __name__ == "__main__":
219 | #     # Load the model pipeline
220 | #     model, tokenizer = load_pipeline()
221 |     
222 | #     # Run benchmark for 300 seconds (5 minutes)
223 | #     results = run_benchmark(model, tokenizer, duration=300)
224 |     
225 | #     # Print results
226 | #     print("\nBenchmark Results:")
227 | #     print(f"Completed: {results['completed']}")
228 | #     if results.get('error'):
229 | #         print(f"Error: {results['error']}")
230 | #     else:
231 | #         print(f"Total generations: {results['result']}")
232 | #         if 'avg_time_ms' in results:
233 | #             print(f"Average generation time: {results['avg_time_ms']:.2f}ms")
234 | #             print(f"GPU utilization: {results['gpu_utilization']:.2f}%")
235 | #             print(f"Maximum GPU temperature: {results['max_temp']}°C")
236 | #             print(f"Average GPU temperature: {results['avg_temp']:.2f}°C")
237 | #             if results['gpu_power_watts']:
238 | #                 print(f"Average GPU power usage: {results['gpu_power_watts']}W")
239 | #             print(f"GPU memory total: {results['gpu_memory_total']}GB")
240 | #             print(f"Platform: {results['platform']}")
241 | #             print(f"Acceleration: {results['acceleration']}")
242 | #             print(f"PyTorch version: {results['torch_version']}")


--------------------------------------------------------------------------------