├── anemll_bench.egg-info ├── dependency_links.txt ├── top_level.txt ├── requires.txt └── SOURCES.txt ├── .DS_Store ├── assets └── sample.png ├── reports ├── chip_comparison_direct.png └── chip_comparison_llama_lm_head.png ├── anemll_bench ├── reports │ ├── __init__.py │ └── report_uploader.py ├── utils │ ├── __init__.py │ ├── visualization.py │ ├── ane_verification.py │ └── system_info.py ├── __init__.py ├── models │ ├── meta.yalm │ ├── benchmark_result.py │ ├── __init__.py │ └── model_syncer.py └── __main__.py ├── .gitignore ├── examples ├── benchmark_config.json ├── check_online_models.py ├── plot_chip_comparison.py ├── sync_models.py ├── load_platform_models.py ├── test_lm_head_benchmark.py ├── benchmark_local_lm_head.py ├── DUAL_MODEL_BENCHMARKING.md ├── benchmark_all_models.py ├── manage_cache.py ├── benchmark_dual_models.py ├── basic_benchmark.py ├── test_model_loading.py ├── generate_results_report.py ├── profile_coreml.py └── batch_profile.py ├── requirements.txt ├── test_browser_open.py ├── setup.py ├── tests ├── test_system_info.py ├── test_benchmark.py └── test_report_uploader.py ├── install_dependencies.sh ├── create_python39_env.sh ├── debug_ane.py ├── check_setup.py └── Results.MD /anemll_bench.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /anemll_bench.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | anemll_bench 2 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anemll/anemll-bench/HEAD/.DS_Store -------------------------------------------------------------------------------- /assets/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anemll/anemll-bench/HEAD/assets/sample.png -------------------------------------------------------------------------------- /reports/chip_comparison_direct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anemll/anemll-bench/HEAD/reports/chip_comparison_direct.png -------------------------------------------------------------------------------- /reports/chip_comparison_llama_lm_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anemll/anemll-bench/HEAD/reports/chip_comparison_llama_lm_head.png -------------------------------------------------------------------------------- /anemll_bench/reports/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reports module for anemll-bench. 3 | """ 4 | 5 | # Import necessary functions for external use 6 | from anemll_bench.reports.report_generator import generate_report 7 | 8 | __all__ = ["generate_report"] -------------------------------------------------------------------------------- /anemll_bench/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for the ANEMLL-Bench package 3 | """ 4 | 5 | from .visualization import plot_chip_comparison, plot_benchmark_results 6 | 7 | __all__ = [ 8 | 'plot_chip_comparison', 9 | 'plot_benchmark_results', 10 | ] -------------------------------------------------------------------------------- /anemll_bench.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | coremltools>=8.2 2 | transformers>=4.36.0 3 | numpy<2 4 | pandas>=2.0.0 5 | matplotlib>=3.5.0 6 | seaborn>=0.12.0 7 | plotly>=5.18.0 8 | tqdm>=4.66.0 9 | requests>=2.28.0 10 | psutil>=5.9.0 11 | pyyaml>=6.0 12 | black>=23.12.0 13 | flake8>=7.0.0 14 | pytest>=7.4.0 15 | pytest-cov>=4.1.0 16 | -------------------------------------------------------------------------------- /anemll_bench/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ANEMLL-Bench: A benchmarking tool for measuring Apple Neural Engine performance 3 | """ 4 | 5 | __version__ = "0.2.0" 6 | 7 | # Avoid importing heavy/optional deps at package import time so that 8 | # lightweight utilities (e.g., plotting) work without torch/psutil installed. 9 | try: 10 | from anemll_bench.benchmark import Benchmark # noqa: F401 11 | __all__ = ["Benchmark"] 12 | except Exception: 13 | # Soft-fail: allow importing submodules like anemll_bench.utils without torch 14 | __all__ = [] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual Environment 2 | env-aneml-bench/* 3 | venv/ 4 | ENV/ 5 | .env 6 | 7 | # Python 8 | __pycache__/ 9 | *.py[cod] 10 | *.class 11 | *.so 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | 17 | 18 | profile_report.html 19 | anemll_bench/results/ 20 | reports/*.html 21 | examples/*.html 22 | benchmark_*.html 23 | env-anemll-bench/* 24 | reports/plots_20250306_125822/throughput.png 25 | =8.2 26 | examples/upload_deephermes_model.py 27 | examples/update_dual_benchmark.py 28 | anemll_bench/__init__.py 29 | anemll_bench/__init__.py 30 | anemll_bench/__init__.py 31 | -------------------------------------------------------------------------------- /anemll_bench/models/meta.yalm: -------------------------------------------------------------------------------- 1 | model_info: 2 | macos_15_x: 3 | - name: "llama_lm_head" 4 | url: "https://huggingface.co/anemll/anemll-bench/resolve/main/llama_lm_head.mlpackage.zip" 5 | type: "mlpackage" 6 | hidden_size: 4096 7 | 8 | - name: "llama_lm_head_lut6" 9 | url: "https://huggingface.co/anemll/anemll-bench/resolve/main/llama_lm_head_lut6.mlpackage.zip" 10 | type: "mlpackage" 11 | hidden_size: 4096 12 | 13 | - name: "DeepHermes_lm_head" 14 | url: "https://huggingface.co/anemll/anemll-bench/resolve/main/DeepHermes_lm_head.mlpackage.zip" 15 | type: "mlpackage" 16 | hidden_size: 4096 -------------------------------------------------------------------------------- /examples/benchmark_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "models": [ 3 | { 4 | "name": "Phi-2", 5 | "id": "microsoft/phi-2", 6 | "type": "pytorch", 7 | "input_shape": [1, 128, 2560], 8 | "backends": ["CPU", "ANE"], 9 | "num_runs": 50 10 | }, 11 | { 12 | "name": "DistilBERT", 13 | "id": "distilbert-base-uncased", 14 | "type": "pytorch", 15 | "input_shape": [1, 128, 768], 16 | "backends": ["CPU", "ANE"], 17 | "num_runs": 50 18 | }, 19 | { 20 | "name": "TinyLlama", 21 | "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", 22 | "type": "pytorch", 23 | "input_shape": [1, 128, 2048], 24 | "backends": ["CPU", "ANE"], 25 | "num_runs": 50 26 | } 27 | ], 28 | "output": { 29 | "report_path": "reports/multi_model_benchmark.html", 30 | "upload": true, 31 | "upload_service": "jsonbin" 32 | } 33 | } -------------------------------------------------------------------------------- /anemll_bench.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | README.md 2 | setup.py 3 | anemll_bench/__init__.py 4 | anemll_bench/__main__.py 5 | anemll_bench/benchmark.py 6 | anemll_bench.egg-info/PKG-INFO 7 | anemll_bench.egg-info/SOURCES.txt 8 | anemll_bench.egg-info/dependency_links.txt 9 | anemll_bench.egg-info/requires.txt 10 | anemll_bench.egg-info/top_level.txt 11 | anemll_bench/models/__init__.py 12 | anemll_bench/models/benchmark_result.py 13 | anemll_bench/models/coreml_adapter.py 14 | anemll_bench/models/model_loader.py 15 | anemll_bench/models/model_syncer.py 16 | anemll_bench/reports/__init__.py 17 | anemll_bench/reports/report_generator.py 18 | anemll_bench/reports/report_uploader.py 19 | anemll_bench/utils/__init__.py 20 | anemll_bench/utils/ane_verification.py 21 | anemll_bench/utils/system_info.py 22 | anemll_bench/utils/visualization.py 23 | tests/test_benchmark.py 24 | tests/test_report_uploader.py 25 | tests/test_system_info.py -------------------------------------------------------------------------------- /anemll_bench/models/benchmark_result.py: -------------------------------------------------------------------------------- 1 | """ 2 | BenchmarkResult class for storing benchmark results 3 | """ 4 | 5 | from dataclasses import dataclass, field 6 | from typing import List, Dict, Any, Optional 7 | import time 8 | 9 | 10 | @dataclass 11 | class BenchmarkResult: 12 | """Class for storing benchmark results""" 13 | model_name: str 14 | backend: str 15 | inference_time_ms: float 16 | input_shape: List[int] 17 | tflops: Optional[float] = None 18 | throughput_gb_s: Optional[float] = None 19 | params_count: int = 0 20 | memory_used_mb: float = 0.0 21 | system_info: Dict[str, Any] = field(default_factory=dict) 22 | model_size_mb: float = 0.0 # Model size in megabytes 23 | timestamp: float = field(default_factory=time.time) # When the benchmark was run 24 | notes: str = "" # Additional notes or context about the benchmark 25 | 26 | @property 27 | def throughput_gbps(self): 28 | """For backward compatibility""" 29 | return self.throughput_gb_s -------------------------------------------------------------------------------- /anemll_bench/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Model-related utilities for ANEMLL-Bench. 3 | """ 4 | 5 | from .model_loader import ( 6 | load_model, 7 | download_from_hf, 8 | convert_to_coreml, 9 | load_platform_model_by_name, 10 | list_available_platform_models, 11 | get_platform_specific_models, 12 | check_and_update_platform_models, 13 | get_macos_version, 14 | CACHE_DIR, 15 | MODELS_CACHE_DIR, 16 | sync_platform_models, 17 | download_meta_file, 18 | get_cache_info, 19 | clear_cache, 20 | ) 21 | 22 | from .benchmark_result import BenchmarkResult 23 | from .model_syncer import ModelSyncer 24 | 25 | __all__ = [ 26 | 'load_model', 27 | 'download_from_hf', 28 | 'convert_to_coreml', 29 | 'load_platform_model_by_name', 30 | 'list_available_platform_models', 31 | 'get_platform_specific_models', 32 | 'check_and_update_platform_models', 33 | 'get_macos_version', 34 | 'BenchmarkResult', 35 | 'CACHE_DIR', 36 | 'MODELS_CACHE_DIR', 37 | 'sync_platform_models', 38 | 'ModelSyncer', 39 | 'download_meta_file', 40 | 'get_cache_info', 41 | 'clear_cache', 42 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # ✅ Core Dependencies (required) 2 | # NOTE: ANEMLL is designed to work with Python 3.9.x 3 | # For Apple Silicon Macs, torch must be installed separately with: 4 | # - For Python 3.9–3.12 (macOS Sequoia): pip install "torch>=2.5,<2.6" torchvision torchaudio 5 | # - For Python 3.13: consider skipping torch or using nightly wheels manually 6 | # Torch is intentionally NOT installed here; install_dependencies.sh or manual install should handle it 7 | coremltools>=8.2 # Required for Apple Neural Engine support 8 | transformers>=4.36.0 # Hugging Face Transformers library 9 | # NumPy 2.x breaks some compiled deps on py3.9; prefer <2 for stability 10 | numpy<2 # Required for array operations 11 | pandas>=2.0.0 # Required for data processing and report generation 12 | matplotlib>=3.5.0 # Required for visualization and report charts 13 | seaborn>=0.12.0 # Enhanced statistical data visualization 14 | plotly>=5.18.0 # Interactive plotting for enhanced report visuals 15 | tqdm>=4.66.0 # Progress bars for long-running operations 16 | requests>=2.28.0 # For report uploading functionality 17 | psutil>=5.9.0 # System monitoring and information 18 | pyyaml>=6.0 # For reading YAML configuration files 19 | 20 | # ⚠️ Test & Development Dependencies (optional for users, required for developers) 21 | black>=23.12.0 # Code formatting 22 | flake8>=7.0.0 # Code linting 23 | pytest>=7.4.0 # Testing framework 24 | pytest-cov>=4.1.0 # Test coverage reporting -------------------------------------------------------------------------------- /test_browser_open.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import webbrowser 6 | import subprocess 7 | 8 | # Get the most recent report in ~/.cache/anemll-bench/reports/ 9 | reports_dir = os.path.expanduser("~/.cache/anemll-bench/reports/") 10 | if not os.path.exists(reports_dir): 11 | print(f"Reports directory {reports_dir} does not exist.") 12 | sys.exit(1) 13 | 14 | # List HTML files in the reports directory 15 | html_files = [f for f in os.listdir(reports_dir) if f.endswith('.html')] 16 | if not html_files: 17 | print(f"No HTML files found in {reports_dir}") 18 | sys.exit(1) 19 | 20 | # Sort by modification time (most recent first) 21 | html_files.sort(key=lambda f: os.path.getmtime(os.path.join(reports_dir, f)), reverse=True) 22 | 23 | # Get the most recent report 24 | report_file = os.path.join(reports_dir, html_files[0]) 25 | print(f"Opening most recent report: {report_file}") 26 | print(f"File exists: {os.path.exists(report_file)}") 27 | 28 | # Try multiple ways to open the file in a browser 29 | 30 | # Method 1: webbrowser.open 31 | try: 32 | file_url = f"file://{os.path.abspath(report_file)}" 33 | print(f"Method 1: Attempting to open with webbrowser.open: {file_url}") 34 | #result = webbrowser.open(file_url) 35 | #print(f"Method 1 result: {result}") 36 | except Exception as e: 37 | print(f"Method 1 failed: {e}") 38 | 39 | # Method 2: webbrowser.get('safari').open 40 | try: 41 | file_url = f"file://{os.path.abspath(report_file)}" 42 | print(f"Method 2: Attempting to open with Safari: {file_url}") 43 | safari = webbrowser.get('safari') 44 | result = safari.open(file_url) 45 | print(f"Method 2 result: {result}") 46 | except Exception as e: 47 | print(f"Method 2 failed: {e}") 48 | 49 | # Method 3: subprocess.run with 'open' command 50 | try: 51 | print(f"Method 3: Attempting to open with 'open' command: {report_file}") 52 | result = subprocess.run(['open', report_file], check=True) 53 | print(f"Method 3 result: {result}") 54 | except Exception as e: 55 | print(f"Method 3 failed: {e}") 56 | 57 | print("Done!") -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Setup configuration for the anemll_bench package 4 | """ 5 | 6 | from setuptools import setup, find_packages 7 | import os 8 | import re 9 | 10 | 11 | def get_version(): 12 | """Extract version from __init__.py""" 13 | init_py = os.path.join('anemll_bench', '__init__.py') 14 | with open(init_py, 'r', encoding='utf-8') as f: 15 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", f.read(), re.M) 16 | if version_match: 17 | return version_match.group(1) 18 | raise RuntimeError("Unable to find version string.") 19 | 20 | 21 | def get_long_description(): 22 | """Get long description from README""" 23 | with open('README.md', encoding='utf-8') as f: 24 | return f.read() 25 | 26 | 27 | def get_requirements(): 28 | """Get requirements from requirements.txt""" 29 | with open('requirements.txt', encoding='utf-8') as f: 30 | return [line.strip() for line in f if line.strip() and not line.startswith('#')] 31 | 32 | 33 | setup( 34 | name="anemll-bench", 35 | version=get_version(), 36 | description="Benchmarking tools for Apple Neural Engine performance", 37 | long_description=get_long_description(), 38 | long_description_content_type="text/markdown", 39 | author="ANEMLL Team", 40 | author_email="contact@anemll.org", 41 | url="https://github.com/anemll/anemll-bench", 42 | packages=find_packages(), 43 | include_package_data=True, 44 | install_requires=get_requirements(), 45 | python_requires=">=3.8", 46 | classifiers=[ 47 | "Development Status :: 3 - Alpha", 48 | "Intended Audience :: Developers", 49 | "Intended Audience :: Science/Research", 50 | "License :: OSI Approved :: MIT License", 51 | "Programming Language :: Python :: 3", 52 | "Programming Language :: Python :: 3.8", 53 | "Programming Language :: Python :: 3.9", 54 | "Programming Language :: Python :: 3.10", 55 | "Topic :: Scientific/Engineering", 56 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 57 | "Operating System :: MacOS :: MacOS X", 58 | ], 59 | keywords="machine learning, benchmarking, apple neural engine, ML, ANE, CoreML", 60 | ) -------------------------------------------------------------------------------- /examples/check_online_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Example script demonstrating how to check for updated model definitions from Hugging Face 4 | """ 5 | 6 | import logging 7 | import sys 8 | import os 9 | 10 | # Add parent directory to path to import anemll_bench 11 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 12 | 13 | from anemll_bench.models import ( 14 | check_and_update_platform_models, 15 | list_available_platform_models, 16 | get_macos_version, 17 | download_meta_file 18 | ) 19 | 20 | def main(): 21 | # Set up logging 22 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 23 | logger = logging.getLogger(__name__) 24 | 25 | # Check macOS version 26 | macos_version = get_macos_version() 27 | if not macos_version: 28 | logger.error("This script is intended to run on macOS systems only.") 29 | return 30 | 31 | logger.info(f"Running on macOS version category: {macos_version}") 32 | 33 | # Get local model definitions 34 | logger.info("Local model definitions:") 35 | local_models = list_available_platform_models() 36 | 37 | # Check for updates from Hugging Face 38 | logger.info("\nChecking for updated model definitions on Hugging Face...") 39 | online_models = check_and_update_platform_models() 40 | 41 | # Compare local and online models 42 | if not local_models and not online_models: 43 | logger.warning("No models found locally or online.") 44 | return 45 | 46 | # Show differences if any 47 | local_model_names = set(model.get("name") for model in local_models) 48 | online_model_names = set(model.get("name") for model in online_models) 49 | 50 | new_models = online_model_names - local_model_names 51 | removed_models = local_model_names - online_model_names 52 | common_models = local_model_names.intersection(online_model_names) 53 | 54 | if new_models: 55 | logger.info(f"\nNew models available online: {', '.join(new_models)}") 56 | 57 | if removed_models: 58 | logger.info(f"\nModels no longer available online: {', '.join(removed_models)}") 59 | 60 | if common_models: 61 | logger.info(f"\nModels available both locally and online: {', '.join(common_models)}") 62 | 63 | # Force update the meta file 64 | if online_models: 65 | logger.info("\nUpdating local meta.yalm file...") 66 | download_meta_file(force_update=True) 67 | logger.info("Local meta.yalm file updated with latest model definitions.") 68 | 69 | if __name__ == "__main__": 70 | main() -------------------------------------------------------------------------------- /tests/test_system_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tests for the system_info module 4 | """ 5 | 6 | import unittest 7 | import os 8 | import sys 9 | 10 | # Add parent directory to import the package 11 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 12 | 13 | from anemll_bench.utils.system_info import get_system_info, get_cpu_info, get_ram_info 14 | 15 | 16 | class TestSystemInfo(unittest.TestCase): 17 | """Tests for system information gathering functions""" 18 | 19 | def test_get_system_info(self): 20 | """Test that system info is returned with expected keys""" 21 | info = get_system_info() 22 | 23 | # Check that main keys exist 24 | self.assertIn('os', info) 25 | self.assertIn('cpu', info) 26 | self.assertIn('ram', info) 27 | self.assertIn('python_version', info) 28 | 29 | # Check OS info 30 | self.assertIn('name', info['os']) 31 | self.assertIn('version', info['os']) 32 | self.assertIn('release', info['os']) 33 | 34 | # Check CPU info 35 | self.assertIn('brand', info['cpu']) 36 | self.assertIn('cores', info['cpu']) 37 | self.assertIn('threads', info['cpu']) 38 | 39 | # Check RAM info 40 | self.assertIn('total_gb', info['ram']) 41 | self.assertIn('available_gb', info['ram']) 42 | 43 | def test_get_cpu_info(self): 44 | """Test CPU info retrieval""" 45 | cpu_info = get_cpu_info() 46 | 47 | self.assertIn('brand', cpu_info) 48 | self.assertIn('architecture', cpu_info) 49 | self.assertIn('cores', cpu_info) 50 | self.assertIn('threads', cpu_info) 51 | 52 | # Make sure cores and threads are integers 53 | self.assertIsInstance(cpu_info['cores'], int) 54 | self.assertIsInstance(cpu_info['threads'], int) 55 | 56 | # Threads should be greater than or equal to cores 57 | self.assertGreaterEqual(cpu_info['threads'], cpu_info['cores']) 58 | 59 | def test_get_ram_info(self): 60 | """Test RAM info retrieval""" 61 | ram_info = get_ram_info() 62 | 63 | self.assertIn('total_gb', ram_info) 64 | self.assertIn('available_gb', ram_info) 65 | 66 | # RAM values should be positive 67 | self.assertGreater(ram_info['total_gb'], 0) 68 | self.assertGreater(ram_info['available_gb'], 0) 69 | 70 | # Available should be less than or equal to total 71 | self.assertLessEqual(ram_info['available_gb'], ram_info['total_gb']) 72 | 73 | 74 | if __name__ == '__main__': 75 | unittest.main() -------------------------------------------------------------------------------- /install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | # Prefer project venv Python if present; otherwise fallback to python3/python 6 | PY="python3" 7 | if [ -x "./env-anemll-bench/bin/python" ]; then 8 | PY="./env-anemll-bench/bin/python" 9 | elif command -v python3 >/dev/null 2>&1; then 10 | PY="python3" 11 | elif command -v python >/dev/null 2>&1; then 12 | PY="python" 13 | fi 14 | PIP="$PY -m pip" 15 | 16 | echo "Using Python: $($PY -c 'import sys; print(sys.executable)')" 17 | 18 | # Upgrade pip inside the chosen interpreter (avoids system Python 2) 19 | $PIP install --upgrade pip 20 | 21 | # Detect version (major.minor) without bc 22 | PYTHON_VERSION=$($PY -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') 23 | echo "Detected Python version: $PYTHON_VERSION" 24 | 25 | # Gentle warning if not 3.9 26 | if [ "${PYTHON_VERSION%%.*}" != "3" ] || [ "${PYTHON_VERSION#*.}" != "9" ]; then 27 | echo "⚠️ WARNING: ANEMLL is designed to work best with Python 3.9.x" 28 | echo "Proceeding with $PYTHON_VERSION; some combinations may require manual tweaks." 29 | fi 30 | 31 | # Install PyTorch based on Python version with fallbacks for compatibility 32 | MAJOR=${PYTHON_VERSION%%.*} 33 | MINOR=${PYTHON_VERSION#*.} 34 | 35 | if [ "$MAJOR" -ge 3 ] && [ "$MINOR" -ge 13 ]; then 36 | echo "Python $PYTHON_VERSION detected. Stable PyTorch wheels may be unavailable. Skipping torch install by default." 37 | echo "If you need nightly, run (inside this environment):" 38 | echo " $PIP install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu" 39 | elif [ "$MAJOR" -eq 3 ] && [ "$MINOR" -eq 9 ]; then 40 | echo "Python 3.9 detected. Installing PyTorch 2.2.2 for maximum compatibility with CoreMLTools on Sequoia..." 41 | $PIP install "torch==2.2.2" "torchvision==0.17.2" "torchaudio==2.2.2" 42 | elif [ "$MAJOR" -ge 3 ] && [ "$MINOR" -ge 10 ] && [ "$MINOR" -le 12 ]; then 43 | echo "Installing PyTorch (stable) for Python $PYTHON_VERSION..." 44 | $PIP install "torch>=2.5,<2.6" torchvision torchaudio || { 45 | echo "PyTorch 2.5.x not available, falling back to 2.2.2..." 46 | $PIP install "torch==2.2.2" "torchvision==0.17.2" "torchaudio==2.2.2" 47 | } 48 | else 49 | echo "Installing PyTorch 2.2.2 for Python $PYTHON_VERSION (fallback for compatibility)..." 50 | $PIP install "torch==2.2.2" "torchvision==0.17.2" "torchaudio==2.2.2" 51 | fi 52 | 53 | # Install coremltools and the rest of dependencies 54 | $PIP install "coremltools>=8.2" 55 | # Install the rest of the dependencies (pin NumPy <2 for py3.9 stability) 56 | $PIP install -r requirements.txt 57 | 58 | # Verify PyTorch (if installed) and coremltools 59 | $PY - <<'PYEND' 60 | try: 61 | import torch 62 | print(f'PyTorch version: {torch.__version__}') 63 | print(f'MPS available: {getattr(getattr(torch, "backends", object()), "mps", object()).is_available() if hasattr(getattr(torch, "backends", object()), "mps") else False}') 64 | except Exception as e: 65 | print(f'PyTorch not installed or failed to import: {e}') 66 | try: 67 | import coremltools 68 | print(f'CoreMLTools version: {coremltools.__version__}') 69 | except Exception as e: 70 | print(f'coremltools import failed: {e}') 71 | PYEND 72 | 73 | echo "Installation complete!" -------------------------------------------------------------------------------- /examples/plot_chip_comparison.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Example script demonstrating the use of ANEMLL-Bench visualization utilities 4 | to create chip comparison charts. 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | from pathlib import Path 11 | import matplotlib 12 | # Force non-interactive backend for headless PNG generation 13 | matplotlib.use('Agg') 14 | 15 | # Add parent directory to path to allow running this script directly 16 | parent_dir = str(Path(__file__).resolve().parent.parent) 17 | if parent_dir not in sys.path: 18 | sys.path.insert(0, parent_dir) 19 | 20 | from anemll_bench.utils import plot_chip_comparison, plot_benchmark_results 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description='ANEMLL-Bench Chip Comparison Visualization') 25 | parser.add_argument('--save', action='store_true', help='Save the figure to a file') 26 | parser.add_argument('--output-dir', type=str, default='./reports', help='Directory to save the figure') 27 | parser.add_argument('--no-show', action='store_true', help='Do not display the figure') 28 | args = parser.parse_args() 29 | 30 | # Sample data from llama_lm_head model benchmarks 31 | chips = ['M1', 'M1 Pro', 'M1 Max', 'M1 Ultra', 'M2', 'M2 Max', 'M2 Ultra', 'M3', 'M3 Max', 'M4', 'M4 Pro\n24GB Mini', 'M4 Max', 'M5'] 32 | bandwidth = [60.87, 54.90, 54.62, 54.72, 60.45, 62.01, 61.68, 63.10, 120.22, 64.18, 126.36, 118.88, 70.21] # GB/s (llama_lm_head) 33 | inference = [7.52, 7.45, 7.61, 7.58, 8.67, 6.64, 6.70, 6.95, 3.98, 6.45, 3.85, 3.87, 6.10] # ms (llama_lm_head_lut6 for M3 base) 34 | bandwidth_factor = ['1.1x', '1.0x', '1.0x', '1.0x', '1.1x', '1.1x', '1.1x', '1.1x', '2.2x', '1.2x', '2.3x', '2.2x', '1.3x'] 35 | inference_factor = ['1.0x', '1.0x', '1.0x', '1.0x', '0.9x', '1.1x', '1.1x', '1.1x', '1.9x', '1.2x', '2.0x', '2.0x', '1.2x'] 36 | 37 | # Option 1: Use plot_chip_comparison directly 38 | print("Demonstrating direct use of plot_chip_comparison function...\n") 39 | 40 | save_path = None 41 | if args.save: 42 | os.makedirs(args.output_dir, exist_ok=True) 43 | save_path = os.path.join(args.output_dir, "chip_comparison_direct.png") 44 | 45 | plot_chip_comparison( 46 | chips=chips, 47 | bandwidth=bandwidth, 48 | inference=inference, 49 | bandwidth_factor=bandwidth_factor, 50 | inference_factor=inference_factor, 51 | title="ANEMLL-BENCH: Apple Neural Engine Performance Comparison (llama_lm_head)", 52 | save_path=save_path, 53 | show_plot=not args.no_show 54 | ) 55 | 56 | # Option 2: Use plot_benchmark_results with a data dictionary 57 | print("Demonstrating use of plot_benchmark_results function with benchmark data...\n") 58 | 59 | # Create a benchmark data dictionary 60 | benchmark_data = { 61 | 'chips': chips, 62 | 'bandwidth': bandwidth, 63 | 'inference': inference, 64 | 'bandwidth_factor': bandwidth_factor, 65 | 'inference_factor': inference_factor, 66 | } 67 | 68 | # Plot using the higher-level function 69 | plot_benchmark_results( 70 | benchmark_data=benchmark_data, 71 | model_name="llama_lm_head", 72 | save_dir=args.output_dir if args.save else None, 73 | show_plot=not args.no_show 74 | ) 75 | 76 | if args.save: 77 | print(f"Figures saved to {args.output_dir}") 78 | 79 | 80 | if __name__ == "__main__": 81 | main() -------------------------------------------------------------------------------- /examples/sync_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Utility script to synchronize all platform-specific models 4 | """ 5 | 6 | import logging 7 | import sys 8 | import os 9 | import argparse 10 | 11 | # Add parent directory to path to import anemll_bench 12 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 13 | 14 | from anemll_bench.models import sync_platform_models, get_macos_version 15 | 16 | def main(): 17 | # Parse arguments 18 | parser = argparse.ArgumentParser(description="Synchronize ANEMLL-Bench platform models") 19 | parser.add_argument("--force", action="store_true", help="Force update of meta.yalm") 20 | parser.add_argument("--update", action="store_true", 21 | help="Update meta.yalm file and download any missing or new models (recommended)") 22 | parser.add_argument("--parallel", action="store_true", 23 | help="Download models in parallel for faster synchronization") 24 | parser.add_argument("--workers", type=int, default=4, 25 | help="Number of parallel download workers (default: 4)") 26 | parser.add_argument("-q", "--quiet", action="store_true", help="Quiet mode (less output)") 27 | args = parser.parse_args() 28 | 29 | # Set up logging 30 | log_level = logging.WARNING if args.quiet else logging.INFO 31 | logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 32 | logger = logging.getLogger(__name__) 33 | 34 | # Check if running on macOS 35 | macos_version = get_macos_version() 36 | if not macos_version: 37 | logger.error("This script is intended to run on macOS systems only.") 38 | return 1 39 | 40 | logger.info(f"Running on macOS version category: {macos_version}") 41 | 42 | # Use force_update if either --force or --update flag is specified 43 | force_update = args.force or args.update 44 | 45 | # Synchronize all platform models 46 | try: 47 | results = sync_platform_models( 48 | force_update=force_update, 49 | parallel=args.parallel, 50 | max_workers=args.workers 51 | ) 52 | 53 | # Print summary 54 | print(f"\nSynchronization Summary:") 55 | print(f" - Meta file updated: {'Yes' if results['meta_updated'] else 'No'}") 56 | print(f" - Models checked: {results['models_checked']}") 57 | print(f" - Models downloaded: {results['models_downloaded']}") 58 | print(f" - Models skipped (already exist): {results['models_skipped']}") 59 | print(f" - Models failed: {results['models_failed']}") 60 | 61 | # Print details for downloaded models 62 | if results['models_downloaded'] > 0: 63 | print("\nDownloaded Models:") 64 | for model in results['models']: 65 | if model['action'] == 'downloaded': 66 | print(f" - {model['name']} ({model['type']})") 67 | 68 | # Print details for failed models 69 | if results['models_failed'] > 0: 70 | print("\nFailed Models:") 71 | for model in results['models']: 72 | if model['action'] in ['failed', 'error']: 73 | print(f" - {model['name']} ({model['type']})") 74 | if 'error' in model: 75 | print(f" Error: {model['error']}") 76 | 77 | # Return success if we didn't have any failures 78 | return 0 if results['models_failed'] == 0 else 1 79 | 80 | except Exception as e: 81 | logger.error(f"Error synchronizing models: {e}") 82 | return 1 83 | 84 | if __name__ == "__main__": 85 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/load_platform_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Example script demonstrating how to use platform-specific model loading functionality 4 | """ 5 | 6 | import logging 7 | import sys 8 | import os 9 | import argparse 10 | 11 | # Add parent directory to path to import anemll_bench 12 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 13 | 14 | from anemll_bench.models import ( 15 | list_available_platform_models, 16 | get_macos_version 17 | ) 18 | from anemll_bench import Benchmark 19 | 20 | def main(): 21 | # Parse arguments 22 | parser = argparse.ArgumentParser(description="Benchmark platform-specific models") 23 | parser.add_argument("--check-online", action="store_true", help="Check online for model updates") 24 | parser.add_argument("--num-runs", type=int, default=10, help="Number of benchmark runs (default: 10)") 25 | parser.add_argument("--model", type=str, help="Specific model to benchmark (default: all available)") 26 | args = parser.parse_args() 27 | 28 | # Set up logging 29 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 30 | logger = logging.getLogger(__name__) 31 | 32 | # Check macOS version 33 | macos_version = get_macos_version() 34 | if not macos_version: 35 | logger.error("This script is intended to run on macOS systems only.") 36 | return 37 | 38 | logger.info(f"Running on macOS version category: {macos_version}") 39 | 40 | # List available platform-specific models 41 | logger.info(f"Looking for models {'(including online check)' if args.check_online else '(local only)'}...") 42 | platform_models = list_available_platform_models(check_online=args.check_online) 43 | 44 | if not platform_models: 45 | logger.error("No platform-specific models available.") 46 | return 47 | 48 | # Create a benchmark instance 49 | benchmark = Benchmark() 50 | 51 | # Determine which models to benchmark 52 | models_to_benchmark = [] 53 | if args.model: 54 | # Benchmark only the specified model 55 | for model_config in platform_models: 56 | if model_config.get("name") == args.model: 57 | models_to_benchmark.append(model_config) 58 | break 59 | 60 | if not models_to_benchmark: 61 | logger.error(f"Model '{args.model}' not found.") 62 | return 63 | else: 64 | # Benchmark all available models 65 | models_to_benchmark = platform_models 66 | 67 | # Try to benchmark each selected model 68 | for model_config in models_to_benchmark: 69 | model_name = model_config.get("name") 70 | logger.info(f"Loading and benchmarking model: {model_name}") 71 | 72 | try: 73 | # Using the simplified method with online check option 74 | result = benchmark.benchmark_platform_model( 75 | model_name=model_name, 76 | num_runs=args.num_runs, 77 | check_online=args.check_online 78 | ) 79 | 80 | logger.info(f"Benchmark results for {model_name}:") 81 | logger.info(f" - Inference time: {result.inference_time_ms:.2f} ms") 82 | if result.tflops is not None: 83 | logger.info(f" - TFLOPs: {result.tflops:.2f}") 84 | else: 85 | logger.info(f" - TFLOPs: Not available") 86 | logger.info(f" - Throughput: {result.throughput_gb_s:.2f} GB/s") 87 | 88 | except Exception as e: 89 | logger.error(f"Error benchmarking model {model_name}: {e}") 90 | 91 | # Generate a benchmark report 92 | benchmark.generate_report(output_path="platform_models_benchmark.html") 93 | logger.info("Benchmark report generated: platform_models_benchmark.html") 94 | 95 | if __name__ == "__main__": 96 | main() -------------------------------------------------------------------------------- /tests/test_benchmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for benchmark module 3 | """ 4 | 5 | import pytest 6 | import os 7 | import torch 8 | import numpy as np 9 | from pathlib import Path 10 | 11 | # Add parent directory to path 12 | import sys 13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 14 | 15 | from anemll_bench import Benchmark 16 | from anemll_bench.utils.system_info import get_system_info 17 | 18 | 19 | class SimpleMLP(torch.nn.Module): 20 | """Simple MLP model for testing""" 21 | 22 | def __init__(self, input_size=128, hidden_size=64, output_size=10): 23 | super().__init__() 24 | self.fc1 = torch.nn.Linear(input_size, hidden_size) 25 | self.relu = torch.nn.ReLU() 26 | self.fc2 = torch.nn.Linear(hidden_size, output_size) 27 | 28 | def forward(self, x): 29 | x = self.fc1(x) 30 | x = self.relu(x) 31 | x = self.fc2(x) 32 | return x 33 | 34 | 35 | def test_benchmark_initialization(): 36 | """Test benchmark initialization""" 37 | benchmark = Benchmark() 38 | assert benchmark is not None 39 | assert hasattr(benchmark, 'results') 40 | assert len(benchmark.results) == 0 41 | assert benchmark.system_info is not None 42 | 43 | 44 | def test_system_info(): 45 | """Test system information collection""" 46 | system_info = get_system_info() 47 | assert system_info is not None 48 | assert 'os' in system_info 49 | assert 'cpu' in system_info 50 | assert 'ram' in system_info 51 | assert 'python_version' in system_info 52 | 53 | if system_info['os']['name'] == 'Darwin': 54 | assert 'mac_model' in system_info 55 | 56 | 57 | def test_benchmark_model_cpu(): 58 | """Test benchmarking a model on CPU""" 59 | # Skip on CI if needed 60 | if os.environ.get('CI') == 'true': 61 | pytest.skip("Skipping benchmark test in CI environment") 62 | 63 | # Create a simple model 64 | model = SimpleMLP() 65 | 66 | # Initialize benchmark 67 | benchmark = Benchmark() 68 | 69 | # Benchmark on CPU 70 | result = benchmark.benchmark_model( 71 | model=model, 72 | model_name="TestMLP", 73 | input_shape=[1, 128], 74 | backend="CPU", 75 | num_runs=10 # Small number for quick testing 76 | ) 77 | 78 | # Check results 79 | assert result is not None 80 | assert result.model_name == "TestMLP" 81 | assert result.backend == "CPU" 82 | assert result.inference_time_ms > 0 83 | assert result.memory_used_mb >= 0 84 | 85 | # Check TFLOPS calculation 86 | assert result.tflops is not None 87 | assert result.tflops >= 0 88 | 89 | # Check throughput calculation 90 | assert result.throughput_gbps is not None 91 | assert result.throughput_gbps >= 0 92 | 93 | 94 | def test_generate_report(): 95 | """Test report generation""" 96 | # Create a simple model and get benchmark results 97 | model = SimpleMLP() 98 | benchmark = Benchmark() 99 | 100 | # Benchmark on CPU (quick run) 101 | benchmark.benchmark_model( 102 | model=model, 103 | model_name="TestMLP", 104 | input_shape=[1, 128], 105 | backend="CPU", 106 | num_runs=5 107 | ) 108 | 109 | # Generate report 110 | test_report_path = "test_report.html" 111 | report_html = benchmark.generate_report(test_report_path) 112 | 113 | # Check report was created 114 | assert os.path.exists(test_report_path) 115 | assert report_html is not None 116 | assert len(report_html) > 0 117 | 118 | # Clean up 119 | os.remove(test_report_path) 120 | 121 | 122 | if __name__ == "__main__": 123 | # Run tests manually 124 | test_benchmark_initialization() 125 | test_system_info() 126 | test_benchmark_model_cpu() 127 | test_generate_report() 128 | print("All tests passed!") -------------------------------------------------------------------------------- /create_python39_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Resolve Python 3.9 from Homebrew dynamically (handles keg-only installs) 4 | # Prioritize native ARM64 Homebrew for Apple Silicon 5 | if [ -x "/opt/homebrew/bin/brew" ]; then 6 | # Use native ARM64 Homebrew first 7 | BREW_PREFIX=$(/opt/homebrew/bin/brew --prefix python@3.9 2>/dev/null) 8 | if [ -n "$BREW_PREFIX" ] && [ -x "$BREW_PREFIX/bin/python3.9" ]; then 9 | PYTHON39_PATH="$BREW_PREFIX/bin/python3.9" 10 | echo "Using native ARM64 Homebrew Python 3.9" 11 | fi 12 | elif command -v brew >/dev/null 2>&1; then 13 | # Fallback to default brew (may be x86_64 under Rosetta) 14 | BREW_PREFIX=$(brew --prefix python@3.9 2>/dev/null) 15 | if [ -n "$BREW_PREFIX" ] && [ -x "$BREW_PREFIX/bin/python3.9" ]; then 16 | PYTHON39_PATH="$BREW_PREFIX/bin/python3.9" 17 | echo "Using default Homebrew Python 3.9 (may be x86_64)" 18 | fi 19 | fi 20 | 21 | # Fallbacks for common Homebrew prefixes (Apple Silicon and Intel) 22 | if [ -z "$PYTHON39_PATH" ]; then 23 | if [ -x "/opt/homebrew/opt/python@3.9/bin/python3.9" ]; then 24 | PYTHON39_PATH="/opt/homebrew/opt/python@3.9/bin/python3.9" 25 | elif [ -x "/usr/local/opt/python@3.9/bin/python3.9" ]; then 26 | PYTHON39_PATH="/usr/local/opt/python@3.9/bin/python3.9" 27 | fi 28 | fi 29 | 30 | # Final fallback: any python3.9 on PATH 31 | if [ -z "$PYTHON39_PATH" ] && command -v python3.9 >/dev/null 2>&1; then 32 | PYTHON39_PATH="$(command -v python3.9)" 33 | fi 34 | 35 | # Check if Python 3.9 is installed 36 | if [ -z "$PYTHON39_PATH" ] || [ ! -x "$PYTHON39_PATH" ]; then 37 | echo "Python 3.9 was not found." 38 | echo "" 39 | echo "For Apple Silicon Macs, install native ARM64 Python:" 40 | echo " /opt/homebrew/bin/brew install python@3.9" 41 | echo "" 42 | echo "For Intel Macs or if native Homebrew is not available:" 43 | echo " brew install python@3.9" 44 | echo "" 45 | echo "If already installed (keg-only), it will live at:" 46 | echo " ARM64: \"$(/opt/homebrew/bin/brew --prefix python@3.9 2>/dev/null)/bin/python3.9\"" 47 | echo " x86_64: \"$(brew --prefix python@3.9 2>/dev/null)/bin/python3.9\"" 48 | exit 1 49 | fi 50 | 51 | echo "Found Python 3.9 at $PYTHON39_PATH" 52 | 53 | # Check if the environment already exists and remove it if it does 54 | if [ -d "env-anemll-bench" ]; then 55 | echo "Found existing env-anemll-bench environment. Removing it..." 56 | rm -rf env-anemll-bench 57 | echo "Existing environment removed." 58 | fi 59 | 60 | # Create a virtual environment with Python 3.9 61 | echo "Creating a fresh virtual environment with Python 3.9..." 62 | "$PYTHON39_PATH" -m venv env-anemll-bench 63 | 64 | # Activate the virtual environment 65 | echo "Activating the virtual environment..." 66 | source env-anemll-bench/bin/activate 67 | 68 | # Verify Python version and architecture 69 | python_version=$(python --version) 70 | python_arch=$(python -c "import platform; print(platform.machine())") 71 | echo "Using $python_version" 72 | echo "Architecture: $python_arch" 73 | 74 | # Warn if using x86_64 on Apple Silicon 75 | if [[ "$python_arch" == "x86_64" ]] && [[ "$(uname -m)" == "arm64" ]]; then 76 | echo "" 77 | echo "⚠️ WARNING: You're using x86_64 Python on Apple Silicon!" 78 | echo " This will prevent access to the Apple Neural Engine (ANE)." 79 | echo " For proper ANE support, use native ARM64 Python:" 80 | echo " /opt/homebrew/bin/brew install python@3.9" 81 | echo "" 82 | fi 83 | 84 | # Copy the installation files to the new environment 85 | echo "Copying installation files to the new environment..." 86 | cp install_dependencies.sh env-anemll-bench/ 87 | cp requirements.txt env-anemll-bench/ 88 | 89 | echo "" 90 | echo "Python 3.9 virtual environment created successfully!" 91 | echo "" 92 | echo "To activate the environment, run:" 93 | echo " source env-anemll-bench/bin/activate" 94 | echo "" 95 | echo "Then run the installation script:" 96 | echo " cd env-anemll-bench" 97 | echo " ./install_dependencies.sh" 98 | echo "" 99 | echo "After installation, you can run your scripts with Python 3.9" -------------------------------------------------------------------------------- /examples/test_lm_head_benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Simple script to benchmark LM head models with sequence_length=1 3 | import os 4 | import sys 5 | import time 6 | import logging 7 | import argparse 8 | 9 | from anemll_bench import Benchmark 10 | from anemll_bench.models.model_loader import get_macos_version, sync_platform_models, get_platform_specific_models 11 | 12 | # Configure logging 13 | logging.basicConfig( 14 | level=logging.INFO, 15 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 16 | ) 17 | logger = logging.getLogger(__name__) 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser(description="Benchmark LM head models with sequence_length=1") 21 | parser.add_argument("--use-local", action="store_true", help="Use local models if they exist") 22 | parser.add_argument("--num-runs", type=int, default=50, help="Number of benchmark runs") 23 | parser.add_argument("--no-browser", action="store_true", help="Don't open browser with report") 24 | parser.add_argument("--model", type=str, help="Specific model to benchmark") 25 | args = parser.parse_args() 26 | 27 | logger.info(f"Running on macOS version category: {get_macos_version()}") 28 | 29 | # Initialize the benchmark tool 30 | benchmark = Benchmark() 31 | 32 | # Synchronize models first 33 | logger.info("Starting platform model synchronization...") 34 | sync_platform_models() 35 | 36 | # Get all available models 37 | all_models = get_platform_specific_models() 38 | 39 | # Filter for LM head models only 40 | lm_head_models = [model for model in all_models if "lm_head" in model.get("name", "").lower()] 41 | 42 | # If a specific model is specified, filter for just that model 43 | if args.model: 44 | lm_head_models = [model for model in lm_head_models if args.model.lower() in model.get("name", "").lower()] 45 | 46 | if not lm_head_models: 47 | logger.error("No LM head models found!") 48 | return 1 49 | 50 | logger.info(f"Found {len(lm_head_models)} LM head models to benchmark") 51 | 52 | results = [] 53 | 54 | # Benchmark each model with sequence_length=1 55 | for model_config in lm_head_models: 56 | model_name = model_config.get("name") 57 | if not model_name: 58 | continue 59 | 60 | logger.info(f"Benchmarking model: {model_name}") 61 | 62 | try: 63 | # Explicitly use sequence_length=1 for LM head models 64 | result = benchmark.benchmark_platform_model( 65 | model_name=model_name, 66 | num_runs=args.num_runs, 67 | batch_size=1, 68 | sequence_length=1, # Explicitly force sequence_length to 1 69 | check_online=False, 70 | force_redownload=False, 71 | use_local_if_exists=args.use_local 72 | ) 73 | 74 | results.append(result) 75 | 76 | logger.info(f"Benchmark successful for {model_name}") 77 | logger.info(f" - Latency: {result.latency_ms:.2f} ms") 78 | logger.info(f" - TFLOPs: {result.tflops:.2f}") 79 | logger.info(f" - Throughput: {result.throughput_gb_s:.2f} GB/s") 80 | 81 | except Exception as e: 82 | logger.error(f"Error benchmarking {model_name}: {e}") 83 | 84 | # Generate a report 85 | if results: 86 | timestamp = time.strftime("%Y%m%d_%H%M%S") 87 | report_path = f"lm_head_benchmark_report_{timestamp}.html" 88 | 89 | logger.info(f"Generating report: {report_path}") 90 | benchmark.generate_report( 91 | output_path=report_path, 92 | include_charts=True, 93 | auto_open=not args.no_browser 94 | ) 95 | 96 | logger.info(f"Report generated: {report_path}") 97 | else: 98 | logger.warning("No benchmark results were generated") 99 | 100 | return 0 101 | 102 | if __name__ == "__main__": 103 | sys.exit(main()) -------------------------------------------------------------------------------- /anemll_bench/reports/report_uploader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Report uploader for anemll-bench. 3 | """ 4 | 5 | import os 6 | import json 7 | import logging 8 | from typing import Optional, Dict, Any 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class ReportUploader: 13 | """ 14 | Class for uploading benchmark reports to various services. 15 | """ 16 | 17 | def __init__(self, service: str = "jsonbin"): 18 | """ 19 | Initialize the report uploader. 20 | 21 | Args: 22 | service: The service to upload to (jsonbin, gist, pastebin) 23 | """ 24 | self.service = service 25 | self._check_credentials() 26 | 27 | def _check_credentials(self): 28 | """ 29 | Check if the necessary credentials are available for the selected service. 30 | """ 31 | if self.service == "jsonbin": 32 | if not os.environ.get("JSONBIN_API_KEY"): 33 | logger.warning("JSONBIN_API_KEY environment variable not set. Upload will fail.") 34 | elif self.service == "gist": 35 | if not os.environ.get("GITHUB_TOKEN"): 36 | logger.warning("GITHUB_TOKEN environment variable not set. Upload will fail.") 37 | elif self.service == "pastebin": 38 | if not os.environ.get("PASTEBIN_API_KEY"): 39 | logger.warning("PASTEBIN_API_KEY environment variable not set. Upload will fail.") 40 | 41 | def upload(self, report_path: str, metadata: Optional[Dict[str, Any]] = None) -> Optional[str]: 42 | """ 43 | Upload a report to the selected service. 44 | 45 | Args: 46 | report_path: Path to the report file 47 | metadata: Additional metadata to include with the upload 48 | 49 | Returns: 50 | URL to the uploaded report, or None if upload failed 51 | """ 52 | logger.info(f"Uploading report {report_path} to {self.service}") 53 | 54 | if not os.path.exists(report_path): 55 | logger.error(f"Report file {report_path} does not exist") 56 | return None 57 | 58 | # Read the report file 59 | with open(report_path, 'r') as f: 60 | content = f.read() 61 | 62 | # Upload based on the selected service 63 | if self.service == "jsonbin": 64 | return self._upload_to_jsonbin(content, metadata) 65 | elif self.service == "gist": 66 | return self._upload_to_gist(content, metadata, os.path.basename(report_path)) 67 | elif self.service == "pastebin": 68 | return self._upload_to_pastebin(content, metadata) 69 | else: 70 | logger.error(f"Unknown upload service: {self.service}") 71 | return None 72 | 73 | def _upload_to_jsonbin(self, content: str, metadata: Optional[Dict[str, Any]]) -> Optional[str]: 74 | """ 75 | Upload to JSONBin.io. 76 | 77 | Args: 78 | content: The content to upload 79 | metadata: Additional metadata 80 | 81 | Returns: 82 | URL to the uploaded content, or None if upload failed 83 | """ 84 | logger.info("JSONBin upload not implemented yet") 85 | return None 86 | 87 | def _upload_to_gist(self, content: str, metadata: Optional[Dict[str, Any]], filename: str) -> Optional[str]: 88 | """ 89 | Upload to GitHub Gist. 90 | 91 | Args: 92 | content: The content to upload 93 | metadata: Additional metadata 94 | filename: The filename to use in the gist 95 | 96 | Returns: 97 | URL to the uploaded content, or None if upload failed 98 | """ 99 | logger.info("GitHub Gist upload not implemented yet") 100 | return None 101 | 102 | def _upload_to_pastebin(self, content: str, metadata: Optional[Dict[str, Any]]) -> Optional[str]: 103 | """ 104 | Upload to Pastebin. 105 | 106 | Args: 107 | content: The content to upload 108 | metadata: Additional metadata 109 | 110 | Returns: 111 | URL to the uploaded content, or None if upload failed 112 | """ 113 | logger.info("Pastebin upload not implemented yet") 114 | return None -------------------------------------------------------------------------------- /debug_ane.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | ANE Debugging Script 4 | 5 | This script helps diagnose why models might not be running on ANE. 6 | Run this before benchmarking to identify potential issues. 7 | """ 8 | 9 | import sys 10 | import os 11 | 12 | # Add the project root to the path 13 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) 14 | 15 | from anemll_bench.utils.ane_verification import run_ane_diagnostic, verify_model_ane_compatibility 16 | from anemll_bench.utils.system_info import get_system_info 17 | 18 | 19 | def main(): 20 | print("ANE Debugging Tool") 21 | print("=" * 60) 22 | 23 | # Run comprehensive diagnostic 24 | diagnostic = run_ane_diagnostic() 25 | 26 | print("\n" + "=" * 60) 27 | print("SYSTEM INFORMATION") 28 | print("=" * 60) 29 | 30 | # Get detailed system info 31 | system_info = get_system_info() 32 | 33 | print(f"Platform: {system_info['os']['name']} {system_info['os']['release']}") 34 | print(f"Architecture: {system_info['os']['name']}") 35 | print(f"Apple Silicon: {system_info['apple_silicon']}") 36 | 37 | if system_info['apple_silicon']: 38 | ane_info = system_info['neural_engine'] 39 | print(f"ANE Available: {ane_info['available']}") 40 | if 'chip_model' in ane_info: 41 | print(f"Chip Model: {ane_info['chip_model']}") 42 | if 'cores' in ane_info: 43 | print(f"ANE Cores: {ane_info['cores']}") 44 | if 'generation' in ane_info: 45 | print(f"Chip Generation: {ane_info['generation']}") 46 | if 'capabilities' in ane_info: 47 | print(f"ANE Capabilities: {', '.join(ane_info['capabilities'])}") 48 | 49 | print("\n" + "=" * 60) 50 | print("RECOMMENDATIONS") 51 | print("=" * 60) 52 | 53 | # Provide recommendations based on diagnostic 54 | if diagnostic['overall_status'] == 'ANE should be available': 55 | print("✓ ANE hardware and software support detected") 56 | print("✓ Your system should be able to run models on ANE") 57 | print("\nIf models are still not using ANE:") 58 | print("1. Ensure models are loaded with CPU_AND_NE compute units") 59 | print("2. Check that models are ML Programs (ANE-optimized)") 60 | print("3. Verify macOS version compatibility") 61 | print("4. Try running the benchmark with enhanced debugging") 62 | 63 | elif diagnostic['overall_status'] == 'Hardware OK, CoreML issues': 64 | print("⚠ ANE hardware detected but CoreML issues found") 65 | print("\nRecommendations:") 66 | print("1. Update CoreML Tools: pip install --upgrade coremltools") 67 | print("2. Check CoreML Tools version compatibility") 68 | print("3. Verify Python environment setup") 69 | 70 | elif diagnostic['overall_status'] == 'Hardware not available': 71 | print("✗ ANE hardware not available") 72 | print("\nThis system cannot use ANE acceleration") 73 | print("Models will run on CPU/GPU instead") 74 | 75 | else: 76 | print("? Unknown issues detected") 77 | print("Please check the diagnostic output above") 78 | 79 | print("\n" + "=" * 60) 80 | print("NEXT STEPS") 81 | print("=" * 60) 82 | 83 | print("1. Run your benchmark with enhanced debugging:") 84 | print(" python -m anemll_bench --model your_model --ane-only") 85 | print() 86 | print("2. Check model compatibility:") 87 | print(" python debug_ane.py --model /path/to/your/model.mlmodel") 88 | print() 89 | print("3. For platform models:") 90 | print(" python examples/benchmark_all_models.py --use-local") 91 | 92 | # Check if a model path was provided 93 | if len(sys.argv) > 1 and sys.argv[1] == '--model' and len(sys.argv) > 2: 94 | model_path = sys.argv[2] 95 | print(f"\n" + "=" * 60) 96 | print(f"MODEL COMPATIBILITY CHECK: {model_path}") 97 | print("=" * 60) 98 | 99 | model_info = verify_model_ane_compatibility(model_path) 100 | 101 | print(f"Model Exists: {model_info['model_exists']}") 102 | if model_info['model_format']: 103 | print(f"Model Format: {model_info['model_format']}") 104 | print(f"ANE Optimized: {model_info['ane_optimized']}") 105 | print(f"ML Program: {model_info['ml_program']}") 106 | 107 | if model_info['issues']: 108 | print(f"\nIssues:") 109 | for issue in model_info['issues']: 110 | print(f" - {issue}") 111 | else: 112 | print(f"\n✓ No compatibility issues detected") 113 | 114 | 115 | if __name__ == "__main__": 116 | main() 117 | -------------------------------------------------------------------------------- /examples/benchmark_local_lm_head.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Simple script to benchmark local LM head models with sequence_length=1 4 | without performing any synchronization or downloads. 5 | """ 6 | 7 | import os 8 | import sys 9 | import logging 10 | import argparse 11 | from typing import List, Dict 12 | 13 | from anemll_bench import Benchmark 14 | from anemll_bench.models.model_loader import read_meta_file, get_macos_version 15 | 16 | # Configure logging 17 | logging.basicConfig( 18 | level=logging.INFO, 19 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 20 | ) 21 | logger = logging.getLogger(__name__) 22 | 23 | def get_local_models() -> List[Dict]: 24 | """Get list of models available locally without any synchronization.""" 25 | meta_data = read_meta_file() 26 | if not meta_data or 'model_info' not in meta_data: 27 | logger.error("No model info found in meta file") 28 | return [] 29 | 30 | macos_version = get_macos_version() 31 | if not macos_version or macos_version not in meta_data['model_info']: 32 | logger.error(f"No models found for {macos_version}") 33 | return [] 34 | 35 | return meta_data['model_info'][macos_version] 36 | 37 | def main(): 38 | parser = argparse.ArgumentParser(description="Benchmark local LM head models with sequence_length=1") 39 | parser.add_argument("--model", type=str, help="Specific model to benchmark") 40 | parser.add_argument("--num-runs", type=int, default=50, help="Number of benchmark runs") 41 | parser.add_argument("--no-browser", action="store_true", help="Don't open browser with report") 42 | args = parser.parse_args() 43 | 44 | logger.info(f"Running on macOS version category: {get_macos_version()}") 45 | 46 | # Initialize the benchmark tool 47 | benchmark = Benchmark() 48 | 49 | # Get all available models without syncing 50 | logger.info("Getting local models without synchronization...") 51 | all_models = get_local_models() 52 | logger.info(f"Found {len(all_models)} models locally") 53 | 54 | # Filter for LM head models only 55 | lm_head_models = [model for model in all_models if "lm_head" in model.get("name", "").lower()] 56 | 57 | # If a specific model is specified, filter for just that model 58 | if args.model: 59 | lm_head_models = [model for model in lm_head_models if args.model.lower() in model.get("name", "").lower()] 60 | 61 | if not lm_head_models: 62 | logger.error("No LM head models found!") 63 | return 1 64 | 65 | logger.info(f"Found {len(lm_head_models)} LM head models to benchmark") 66 | for model in lm_head_models: 67 | logger.info(f" - {model.get('name')}") 68 | 69 | results = [] 70 | 71 | # Benchmark each model with sequence_length=1 72 | for model_config in lm_head_models: 73 | model_name = model_config.get("name") 74 | if not model_name: 75 | continue 76 | 77 | logger.info(f"Benchmarking model: {model_name}") 78 | 79 | try: 80 | # Explicitly use sequence_length=1 for LM head models 81 | # Set use_local_if_exists=True to avoid re-downloads 82 | # Set check_online=False to avoid checking for updates 83 | result = benchmark.benchmark_platform_model( 84 | model_name=model_name, 85 | num_runs=args.num_runs, 86 | batch_size=1, 87 | sequence_length=1, # Explicitly force sequence_length to 1 88 | check_online=False, # Don't check online 89 | force_redownload=False, # Don't force redownload 90 | use_local_if_exists=True # Use local models even if they might be corrupted 91 | ) 92 | 93 | results.append(result) 94 | 95 | logger.info(f"Benchmark successful for {model_name}") 96 | if hasattr(result, 'latency_ms'): 97 | logger.info(f" - Latency: {result.latency_ms:.2f} ms") 98 | logger.info(f" - TFLOPs: {result.tflops:.2f}") 99 | logger.info(f" - Throughput: {result.throughput_gb_s:.2f} GB/s") 100 | 101 | except Exception as e: 102 | logger.error(f"Error benchmarking {model_name}: {e}") 103 | 104 | # Generate a report 105 | if results: 106 | import time 107 | timestamp = time.strftime("%Y%m%d_%H%M%S") 108 | report_path = f"lm_head_local_benchmark_{timestamp}.html" 109 | 110 | logger.info(f"Generating report: {report_path}") 111 | benchmark.generate_report( 112 | output_path=report_path, 113 | include_charts=True, 114 | auto_open=not args.no_browser 115 | ) 116 | 117 | logger.info(f"Report generated: {report_path}") 118 | else: 119 | logger.warning("No benchmark results were generated") 120 | 121 | return 0 122 | 123 | if __name__ == "__main__": 124 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/DUAL_MODEL_BENCHMARKING.md: -------------------------------------------------------------------------------- 1 | # Dual Model Benchmarking 2 | 3 | This feature allows you to benchmark two models simultaneously to measure how they perform when running in parallel on the Neural Engine. 4 | 5 | ## Overview 6 | 7 | When running multiple ML models concurrently, resource contention can affect performance. The dual model benchmarking feature helps you: 8 | 9 | - Measure individual performance of each model running alone 10 | - Measure performance when both models run simultaneously 11 | - Analyze efficiency and resource utilization 12 | - Compare throughput and latency in isolation vs. parallel execution 13 | 14 | ## Requirements 15 | 16 | - macOS with Apple Neural Engine (M1/M2/M3 series) 17 | - Python 3.8+ 18 | - anemll-bench package 19 | 20 | ## Updating from Previous Versions 21 | 22 | If you're updating from a previous version of the tool, follow these steps to ensure compatibility: 23 | 24 | 1. **Use the automated update script** (recommended): 25 | ```bash 26 | python examples/update_dual_benchmark.py 27 | ``` 28 | This script will: 29 | - Update the meta.yalm file with the latest model information 30 | - Check if required models are available 31 | - Download any missing models automatically 32 | 33 | If you want to force re-downloading all models: 34 | ```bash 35 | python examples/update_dual_benchmark.py --force-resync 36 | ``` 37 | 38 | 2. **Manual update**: If you prefer to update manually: 39 | ```bash 40 | # Update the meta.yalm file and download any missing/new models (recommended) 41 | python examples/sync_models.py --update 42 | 43 | # For faster downloads, use parallel mode 44 | python examples/sync_models.py --update --parallel 45 | 46 | # Or use these individual steps: 47 | # Update the meta.yalm file 48 | python examples/sync_models.py --force 49 | 50 | # Check which models are available for your platform 51 | python examples/list_platform_models.py 52 | 53 | # Download models 54 | python examples/sync_models.py 55 | ``` 56 | 57 | 3. **Check cached models**: If you encounter any errors, check your model cache: 58 | ```bash 59 | python examples/manage_cache.py --status 60 | ``` 61 | 62 | You may need to clear the cache if there are corrupted models: 63 | ```bash 64 | python examples/manage_cache.py --clear-models 65 | ``` 66 | 67 | ## Basic Usage 68 | 69 | Run a dual model benchmark with default settings: 70 | 71 | ```bash 72 | python examples/benchmark_dual_models.py 73 | ``` 74 | 75 | This will benchmark the default models (`llama_lm_head` and `DeepHermes_lm_head`) with 300 runs each. 76 | 77 | ## Advanced Options 78 | 79 | Customize the benchmark with these options: 80 | 81 | ```bash 82 | python examples/benchmark_dual_models.py --runs 100 83 | ``` 84 | 85 | ## Understanding the Results 86 | 87 | The benchmark will output several key metrics: 88 | 89 | 1. **Individual Performance**: 90 | - Inference time (ms) for each model running alone 91 | - Throughput (GB/s) for each model running alone 92 | 93 | 2. **Parallel Performance**: 94 | - Inference time (ms) for each model running simultaneously 95 | - Throughput (GB/s) for each model running simultaneously 96 | - Combined metrics showing overall system performance 97 | 98 | 3. **Combined Analysis**: 99 | - Total parallel execution time 100 | - Combined throughput 101 | - Sum of individual throughputs 102 | - Bandwidth utilization factor 103 | - Efficiency percentage 104 | 105 | 4. **HTML Report**: 106 | An interactive HTML report will be generated at `~/.cache/anemll-bench/reports/` 107 | 108 | ## Interpreting Efficiency 109 | 110 | The efficiency percentage indicates how well the models share resources: 111 | 112 | - **~100%**: Near-perfect resource sharing, minimal contention 113 | - **~50%**: Significant resource contention, models competing for bandwidth 114 | - **<50%**: Severe contention, consider running models sequentially 115 | 116 | ## Example Output 117 | 118 | ``` 119 | === Dual Model Benchmarking Results === 120 | 121 | Individual Performance: 122 | - llama_lm_head: 19.35 ms, 54.25 GB/s 123 | - DeepHermes_lm_head: 19.42 ms, 54.07 GB/s 124 | 125 | Parallel Performance: 126 | - llama_lm_head: 38.12 ms, 27.56 GB/s 127 | - DeepHermes_lm_head: 38.25 ms, 27.45 GB/s 128 | - Combined: 38.25 ms, 54.85 GB/s (total throughput) 129 | 130 | Combined Analysis: 131 | - Total Parallel Execution Time: 0.77 seconds 132 | - Combined Throughput: 54.85 GB/s 133 | - Sum of Individual Throughputs: 108.32 GB/s 134 | - Bandwidth Utilization Factor: 0.51x 135 | - Efficiency: 50.64% 136 | ``` 137 | 138 | ## Troubleshooting 139 | 140 | If you encounter issues: 141 | 142 | 1. **Models not found**: 143 | - Run `python examples/sync_models.py --force` to update the meta.yalm file and download models 144 | 145 | 2. **Performance issues**: 146 | - Ensure no other intensive applications are running 147 | - Try rebooting your system to clear memory 148 | 149 | 3. **Report generation fails**: 150 | - Check disk space in your home directory 151 | - Ensure you have write permissions to `~/.cache/anemll-bench/reports/` -------------------------------------------------------------------------------- /examples/benchmark_all_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Benchmark all platform-specific models and generate a comprehensive report. 4 | This script automates the entire process: 5 | 1. Syncs (downloads if needed) all models for the current macOS version 6 | 2. Benchmarks each available model 7 | 3. Generates a consolidated report with comparison charts 8 | """ 9 | 10 | import logging 11 | import sys 12 | import os 13 | import argparse 14 | import time 15 | import webbrowser 16 | import subprocess 17 | from pathlib import Path 18 | from typing import Optional 19 | from datetime import datetime 20 | 21 | # Add parent directory to path to import anemll_bench 22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 23 | 24 | from anemll_bench import Benchmark 25 | from anemll_bench.models import get_macos_version, list_available_platform_models, clear_cache 26 | 27 | def main(): 28 | # Parse arguments 29 | parser = argparse.ArgumentParser(description="Benchmark all platform-specific models") 30 | parser.add_argument("--no-sync", action="store_true", help="Skip model synchronization") 31 | parser.add_argument("--no-charts", action="store_true", help="Do not include charts in report") 32 | parser.add_argument("--force-redownload", action="store_true", help="Force re-download of models even if they exist") 33 | parser.add_argument("--use-local", action="store_true", help="Use local models if they exist, even if they might be corrupted") 34 | parser.add_argument("--no-browser", action="store_true", help="Do not automatically open the report in a browser") 35 | parser.add_argument("--runs", type=int, default=300, help="Number of benchmark runs per model (default: 300)") 36 | parser.add_argument("--batch-size", type=int, default=1, help="Batch size (default: 1)") 37 | parser.add_argument("--sequence-length", type=int, default=None, help="Sequence length (default: 1)") 38 | parser.add_argument("--output", type=str, default=None, help="Custom output path for the report") 39 | parser.add_argument("--model", type=str, default=None, help="Specific model to benchmark (benchmarks all if not specified)") 40 | parser.add_argument("--local-model-path", type=str, default=None, help="Path to a local model file to use instead of downloading") 41 | parser.add_argument("--verbose", action="store_true", help="Enable verbose logging (DEBUG level)") 42 | args = parser.parse_args() 43 | 44 | # Set up logging 45 | log_level = logging.DEBUG if args.verbose else logging.INFO 46 | logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 47 | logger = logging.getLogger(__name__) 48 | 49 | # Check if running on macOS 50 | macos_version = get_macos_version() 51 | if not macos_version: 52 | logger.error("This script is intended to run on macOS systems only.") 53 | return 1 54 | 55 | logger.info(f"Running on macOS version category: {macos_version}") 56 | 57 | # Set output path 58 | if args.output is None: 59 | # Use the macOS version in the report name 60 | timestamp = time.strftime("%Y%m%d_%H%M%S") 61 | filename = f"benchmark_report_{macos_version}_{timestamp}.html" 62 | # Report will be saved in the cache directory 63 | cache_dir = os.path.expanduser("~/.cache/anemll-bench/reports") 64 | os.makedirs(cache_dir, exist_ok=True) 65 | args.output = os.path.join(cache_dir, filename) 66 | else: 67 | # If a custom path is provided, check if it's absolute 68 | if not os.path.isabs(args.output): 69 | # If it's relative, save it in the cache directory 70 | cache_dir = os.path.expanduser("~/.cache/anemll-bench/reports") 71 | os.makedirs(cache_dir, exist_ok=True) 72 | args.output = os.path.join(cache_dir, args.output) 73 | 74 | # Debug output path 75 | print(f"DEBUG: Output path: {args.output}") 76 | print(f"DEBUG: Absolute output path: {os.path.abspath(args.output)}") 77 | 78 | logger.info("Starting platform model benchmarking process...") 79 | 80 | # Initialize benchmark 81 | benchmark = Benchmark() 82 | 83 | # If a specific model is requested 84 | if args.model: 85 | logger.info(f"Benchmarking specific model: {args.model}") 86 | result = benchmark.benchmark_platform_model( 87 | model_name=args.model, 88 | num_runs=args.runs, 89 | batch_size=args.batch_size, 90 | sequence_length=args.sequence_length, 91 | check_online=not args.no_sync, 92 | force_redownload=args.force_redownload, 93 | use_local_if_exists=args.use_local 94 | ) 95 | 96 | # Generate report for single model 97 | benchmark.generate_report(args.output) 98 | else: 99 | # Run benchmarks for all models 100 | benchmark.benchmark_all_platform_models( 101 | num_runs=args.runs, 102 | batch_size=args.batch_size, 103 | sequence_length=args.sequence_length, 104 | sync_first=not args.no_sync, 105 | include_charts=not args.no_charts, 106 | output_path=args.output, 107 | force_redownload=args.force_redownload, 108 | auto_open=not args.no_browser, 109 | use_local_if_exists=args.use_local 110 | ) 111 | 112 | # Log completion 113 | logger.info(f"Report generated: {args.output}") 114 | 115 | return 0 116 | 117 | if __name__ == "__main__": 118 | sys.exit(main()) -------------------------------------------------------------------------------- /tests/test_report_uploader.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests for report uploader module 3 | """ 4 | 5 | import pytest 6 | import os 7 | import json 8 | import tempfile 9 | from unittest.mock import patch, MagicMock 10 | 11 | # Add parent directory to path 12 | import sys 13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 14 | 15 | from anemll_bench.reports.report_uploader import ReportUploader 16 | 17 | 18 | def test_uploader_initialization(): 19 | """Test uploader initialization""" 20 | uploader = ReportUploader() 21 | assert uploader is not None 22 | assert uploader.service == "gist" 23 | 24 | # Test with different service 25 | uploader = ReportUploader(service="jsonbin") 26 | assert uploader.service == "jsonbin" 27 | 28 | # Test with invalid service 29 | with pytest.raises(ValueError): 30 | uploader.upload({"test": "data"}) 31 | 32 | 33 | @patch('requests.post') 34 | def test_gist_upload(mock_post): 35 | """Test uploading to GitHub Gist""" 36 | # Mock environment variable 37 | with patch.dict(os.environ, {"GITHUB_TOKEN": "fake_token"}): 38 | # Create mock response 39 | mock_response = MagicMock() 40 | mock_response.status_code = 201 41 | mock_response.json.return_value = {"html_url": "https://gist.github.com/test"} 42 | mock_post.return_value = mock_response 43 | 44 | # Create uploader and test 45 | uploader = ReportUploader(service="gist") 46 | result = uploader.upload( 47 | report_data={"test": "data"}, 48 | title="Test Report", 49 | description="Test Description" 50 | ) 51 | 52 | # Verify result 53 | assert result == "https://gist.github.com/test" 54 | 55 | # Verify correct API call 56 | assert mock_post.called 57 | args, kwargs = mock_post.call_args 58 | assert args[0] == "https://api.github.com/gists" 59 | assert "json" in kwargs 60 | assert kwargs["json"]["description"] == "Test Description" 61 | assert kwargs["json"]["public"] is True 62 | 63 | # Test error handling 64 | with pytest.raises(ValueError): 65 | # Test with no token 66 | with patch.dict(os.environ, {"GITHUB_TOKEN": ""}, clear=True): 67 | uploader.upload({"test": "data"}) 68 | 69 | 70 | @patch('requests.post') 71 | def test_jsonbin_upload(mock_post): 72 | """Test uploading to JSONBin""" 73 | # Mock environment variable 74 | with patch.dict(os.environ, {"JSONBIN_API_KEY": "fake_key"}): 75 | # Create mock response 76 | mock_response = MagicMock() 77 | mock_response.status_code = 200 78 | mock_response.json.return_value = {"metadata": {"id": "test-bin-id"}} 79 | mock_post.return_value = mock_response 80 | 81 | # Create uploader and test 82 | uploader = ReportUploader(service="jsonbin") 83 | result = uploader.upload( 84 | report_data={"test": "data", "system_info": {"mac_model": "MacPro"}}, 85 | title="Test Report", 86 | description="Test Description" 87 | ) 88 | 89 | # Verify result 90 | assert result == "https://jsonbin.io/b/test-bin-id" 91 | 92 | # Verify correct API call 93 | assert mock_post.called 94 | args, kwargs = mock_post.call_args 95 | assert args[0] == "https://api.jsonbin.io/v3/b" 96 | assert "json" in kwargs 97 | assert "_metadata" in kwargs["json"] 98 | assert kwargs["json"]["_metadata"]["title"] == "Test Report" 99 | 100 | # Test error handling 101 | with pytest.raises(ValueError): 102 | # Test with no API key 103 | with patch.dict(os.environ, {"JSONBIN_API_KEY": ""}, clear=True): 104 | uploader.upload({"test": "data"}) 105 | 106 | 107 | @patch('requests.post') 108 | def test_pastebin_upload(mock_post): 109 | """Test uploading to Pastebin""" 110 | # Mock environment variable 111 | with patch.dict(os.environ, {"PASTEBIN_API_KEY": "fake_key"}): 112 | # Create mock response 113 | mock_response = MagicMock() 114 | mock_response.text = "https://pastebin.com/test" 115 | mock_post.return_value = mock_response 116 | 117 | # Create uploader and test 118 | uploader = ReportUploader(service="pastebin") 119 | result = uploader.upload( 120 | report_data={"test": "data"}, 121 | title="Test Report", 122 | description="Test Description" 123 | ) 124 | 125 | # Verify result 126 | assert result == "https://pastebin.com/test" 127 | 128 | # Verify correct API call 129 | assert mock_post.called 130 | args, kwargs = mock_post.call_args 131 | assert args[0] == "https://pastebin.com/api/api_post.php" 132 | assert "data" in kwargs 133 | assert kwargs["data"]["api_paste_name"] == "Test Report" 134 | 135 | # Test error handling 136 | with pytest.raises(ValueError): 137 | # Test with no API key 138 | with patch.dict(os.environ, {"PASTEBIN_API_KEY": ""}, clear=True): 139 | uploader.upload({"test": "data"}) 140 | 141 | # Test response error 142 | mock_response.text = "Bad API request" 143 | with pytest.raises(Exception): 144 | uploader.upload({"test": "data"}) 145 | 146 | 147 | if __name__ == "__main__": 148 | # Run tests manually 149 | test_uploader_initialization() 150 | print("All tests passed!") -------------------------------------------------------------------------------- /check_setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Setup Verification Script for ANEMLL-Bench 4 | 5 | This script checks if your environment is properly configured for Apple Neural Engine (ANE) access. 6 | """ 7 | 8 | import platform 9 | import sys 10 | import subprocess 11 | import os 12 | 13 | 14 | def check_python_architecture(): 15 | """Check if Python is running on the correct architecture""" 16 | arch = platform.machine() 17 | system_arch = os.uname().machine if hasattr(os, 'uname') else 'unknown' 18 | 19 | print(f"System Architecture: {system_arch}") 20 | print(f"Python Architecture: {arch}") 21 | 22 | if arch == 'arm64' and system_arch == 'arm64': 23 | print("✅ Python is running natively on Apple Silicon") 24 | return True 25 | elif arch == 'x86_64' and system_arch == 'arm64': 26 | print("❌ Python is running under Rosetta (x86_64 emulation)") 27 | print(" This will prevent ANE access!") 28 | return False 29 | elif arch == 'x86_64' and system_arch == 'x86_64': 30 | print("ℹ️ Running on Intel Mac (ANE not available)") 31 | return False 32 | else: 33 | print(f"⚠️ Unknown architecture combination: {arch} on {system_arch}") 34 | return False 35 | 36 | 37 | def check_homebrew(): 38 | """Check Homebrew installation and architecture""" 39 | print("\n=== Homebrew Check ===") 40 | 41 | # Check native ARM64 Homebrew 42 | native_brew = "/opt/homebrew/bin/brew" 43 | if os.path.exists(native_brew): 44 | try: 45 | result = subprocess.run([native_brew, "--version"], 46 | capture_output=True, text=True, timeout=5) 47 | if result.returncode == 0: 48 | print("✅ Native ARM64 Homebrew found: /opt/homebrew/bin/brew") 49 | return True 50 | except: 51 | pass 52 | 53 | # Check default Homebrew 54 | try: 55 | result = subprocess.run(["brew", "--version"], 56 | capture_output=True, text=True, timeout=5) 57 | if result.returncode == 0: 58 | brew_path = subprocess.run(["which", "brew"], 59 | capture_output=True, text=True).stdout.strip() 60 | print(f"⚠️ Default Homebrew found: {brew_path}") 61 | print(" This may be x86_64 under Rosetta") 62 | return False 63 | except: 64 | pass 65 | 66 | print("❌ No Homebrew found") 67 | return False 68 | 69 | 70 | def check_python_versions(): 71 | """Check available Python versions""" 72 | print("\n=== Python Versions Check ===") 73 | 74 | python_paths = [ 75 | ("System Python", "/usr/bin/python3"), 76 | ("ARM64 Homebrew Python", "/opt/homebrew/opt/python@3.9/bin/python3.9"), 77 | ("x86_64 Homebrew Python", "/usr/local/opt/python@3.9/bin/python3.9"), 78 | ] 79 | 80 | for name, path in python_paths: 81 | if os.path.exists(path): 82 | try: 83 | result = subprocess.run([path, "-c", "import platform; print(platform.machine())"], 84 | capture_output=True, text=True, timeout=5) 85 | if result.returncode == 0: 86 | arch = result.stdout.strip() 87 | print(f"✅ {name}: {path} (Architecture: {arch})") 88 | else: 89 | print(f"❌ {name}: {path} (Error)") 90 | except: 91 | print(f"❌ {name}: {path} (Error)") 92 | else: 93 | print(f"❌ {name}: Not found") 94 | 95 | 96 | def check_coreml(): 97 | """Check CoreML Tools installation""" 98 | print("\n=== CoreML Tools Check ===") 99 | 100 | try: 101 | import coremltools as ct 102 | print(f"✅ CoreML Tools: {ct.__version__}") 103 | 104 | # Check compute units 105 | compute_units = [str(cu) for cu in ct.ComputeUnit] 106 | ane_units = [cu for cu in compute_units if 'NE' in cu or 'ALL' in cu] 107 | if ane_units: 108 | print(f"✅ ANE compute units available: {ane_units}") 109 | else: 110 | print("❌ No ANE compute units found") 111 | 112 | return True 113 | except ImportError: 114 | print("❌ CoreML Tools not installed") 115 | return False 116 | 117 | 118 | def main(): 119 | print("ANEMLL-Bench Setup Verification") 120 | print("=" * 40) 121 | 122 | # Check Python architecture 123 | python_ok = check_python_architecture() 124 | 125 | # Check Homebrew 126 | homebrew_ok = check_homebrew() 127 | 128 | # Check Python versions 129 | check_python_versions() 130 | 131 | # Check CoreML 132 | coreml_ok = check_coreml() 133 | 134 | # Summary 135 | print("\n" + "=" * 40) 136 | print("SUMMARY") 137 | print("=" * 40) 138 | 139 | if python_ok and coreml_ok: 140 | print("✅ Your setup looks good for ANE access!") 141 | print(" You should be able to run models on the Apple Neural Engine.") 142 | elif not python_ok: 143 | print("❌ Python architecture issue detected.") 144 | print(" Rebuild your environment with native ARM64 Python:") 145 | print(" /usr/bin/python3 -m venv env-anemll-bench") 146 | print(" OR") 147 | print(" /opt/homebrew/bin/brew install python@3.9") 148 | print(" /opt/homebrew/opt/python@3.9/bin/python3.9 -m venv env-anemll-bench") 149 | elif not coreml_ok: 150 | print("❌ CoreML Tools not installed.") 151 | print(" Install with: pip install coremltools") 152 | else: 153 | print("⚠️ Mixed issues detected. Check the details above.") 154 | 155 | print("\nRun 'python debug_ane.py' for detailed ANE diagnostics.") 156 | 157 | 158 | if __name__ == "__main__": 159 | main() 160 | -------------------------------------------------------------------------------- /examples/manage_cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Utility script for managing the ANEMLL-Bench model cache 4 | """ 5 | 6 | import logging 7 | import sys 8 | import os 9 | import argparse 10 | import json 11 | 12 | # Add parent directory to path to import anemll_bench 13 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 14 | 15 | from anemll_bench.models import ( 16 | get_cache_info, 17 | clear_cache, 18 | check_and_update_platform_models, 19 | sync_platform_models, 20 | CACHE_DIR 21 | ) 22 | 23 | def main(): 24 | # Parse arguments 25 | parser = argparse.ArgumentParser(description="Manage ANEMLL-Bench model cache") 26 | subparsers = parser.add_subparsers(dest="command", help="Command to execute") 27 | 28 | # Info command 29 | info_parser = subparsers.add_parser("info", help="Display cache information") 30 | info_parser.add_argument("--json", action="store_true", help="Output in JSON format") 31 | 32 | # Clear command 33 | clear_parser = subparsers.add_parser("clear", help="Clear the cache") 34 | clear_parser.add_argument("--all", action="store_true", help="Clear all cache including meta file") 35 | clear_parser.add_argument("--model", type=str, help="Clear only the specified model") 36 | 37 | # Update command 38 | update_parser = subparsers.add_parser("update", help="Update model definitions from online source") 39 | 40 | # Sync command 41 | sync_parser = subparsers.add_parser("sync", help="Synchronize all platform models (download if not present)") 42 | sync_parser.add_argument("--force", action="store_true", help="Force update of meta.yalm before syncing") 43 | sync_parser.add_argument("--json", action="store_true", help="Output results in JSON format") 44 | 45 | args = parser.parse_args() 46 | 47 | # Set up logging 48 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 49 | logger = logging.getLogger(__name__) 50 | 51 | # Execute the requested command 52 | if args.command == "info": 53 | display_cache_info(json_output=args.json) 54 | elif args.command == "clear": 55 | clear_model_cache(include_meta=args.all, model_name=args.model) 56 | elif args.command == "update": 57 | update_model_definitions() 58 | elif args.command == "sync": 59 | sync_all_models(force_update=args.force, json_output=args.json) 60 | else: 61 | parser.print_help() 62 | 63 | def display_cache_info(json_output=False): 64 | """Display information about the cache""" 65 | cache_info = get_cache_info() 66 | 67 | if json_output: 68 | # Output in JSON format 69 | print(json.dumps(cache_info, indent=2)) 70 | else: 71 | # Output in human-readable format 72 | print(f"\nANEMLL-Bench Cache Information") 73 | print(f"==============================") 74 | print(f"Cache Directory: {cache_info['cache_dir']}") 75 | print(f"Models Directory: {cache_info['models_dir']}") 76 | print(f"Meta File: {cache_info['meta_file']} (Exists: {cache_info['meta_file_exists']})") 77 | print(f"Total Cache Size: {cache_info['total_size_mb']:.2f} MB") 78 | 79 | if cache_info['models']: 80 | print(f"\nCached Models:") 81 | print(f"-------------") 82 | for model in cache_info['models']: 83 | print(f" - {model['name']} ({model['type']})") 84 | print(f" Path: {model['path']}") 85 | print(f" Size: {model['size_mb']:.2f} MB") 86 | print() 87 | else: 88 | print("\nNo models in cache") 89 | 90 | def clear_model_cache(include_meta=False, model_name=None): 91 | """Clear the model cache""" 92 | logger = logging.getLogger(__name__) 93 | 94 | if model_name: 95 | logger.info(f"Clearing model: {model_name}") 96 | elif include_meta: 97 | logger.info("Clearing entire cache including meta file") 98 | else: 99 | logger.info("Clearing model cache") 100 | 101 | success = clear_cache(include_meta=include_meta, model_name=model_name) 102 | 103 | if success: 104 | logger.info("Cache cleared successfully") 105 | else: 106 | logger.error("Failed to clear cache") 107 | 108 | def update_model_definitions(): 109 | """Update model definitions from online source""" 110 | logger = logging.getLogger(__name__) 111 | 112 | logger.info("Checking for updated model definitions...") 113 | models = check_and_update_platform_models() 114 | 115 | if models: 116 | logger.info(f"Updated model definitions: {len(models)} models available") 117 | for model in models: 118 | name = model.get("name", "unknown") 119 | model_type = model.get("type", "unknown") 120 | logger.info(f" - {name} ({model_type})") 121 | else: 122 | logger.warning("No model definitions found online") 123 | 124 | def sync_all_models(force_update=False, json_output=False): 125 | """Synchronize all platform models""" 126 | logger = logging.getLogger(__name__) 127 | 128 | logger.info("Synchronizing all platform models...") 129 | results = sync_platform_models(force_update=force_update) 130 | 131 | if json_output: 132 | # Output in JSON format 133 | print(json.dumps(results, indent=2)) 134 | else: 135 | # Output in human-readable format 136 | print(f"\nPlatform Model Synchronization Results") 137 | print(f"=====================================") 138 | print(f"Meta file updated: {results['meta_updated']}") 139 | print(f"Models checked: {results['models_checked']}") 140 | print(f"Models downloaded: {results['models_downloaded']}") 141 | print(f"Models skipped (already exist): {results['models_skipped']}") 142 | print(f"Models failed: {results['models_failed']}") 143 | 144 | if results['models']: 145 | print(f"\nModel Details:") 146 | print(f"-------------") 147 | for model in results['models']: 148 | print(f" - {model['name']} ({model['type']})") 149 | print(f" Path: {model['path']}") 150 | print(f" Action: {model['action']}") 151 | if 'error' in model: 152 | print(f" Error: {model['error']}") 153 | print() 154 | 155 | if __name__ == "__main__": 156 | main() -------------------------------------------------------------------------------- /examples/benchmark_dual_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Example script to benchmark two models simultaneously to measure bandwidth utilization. 4 | 5 | This script demonstrates how to: 6 | 1. Load two different CoreML models 7 | 2. Benchmark them individually as a baseline 8 | 3. Benchmark them running simultaneously in separate threads 9 | 4. Compare the results to identify potential bandwidth improvements 10 | """ 11 | 12 | import os 13 | import sys 14 | import argparse 15 | import time 16 | import subprocess 17 | import platform 18 | from pathlib import Path 19 | 20 | # Add parent directory to path to import anemll_bench 21 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) 22 | 23 | from anemll_bench import Benchmark 24 | import coremltools as ct 25 | 26 | 27 | def extract_input_shape_from_model(model): 28 | """Extract the required input shape from a CoreML model""" 29 | try: 30 | # Get model spec 31 | spec = model.get_spec().description.input 32 | if spec and len(spec) > 0: 33 | # Extract shape from first input 34 | input_tensor = spec[0] 35 | shape = [dim for dim in input_tensor.type.multiArrayType.shape] 36 | return shape 37 | except Exception as e: 38 | print(f"Error extracting input shape: {e}") 39 | 40 | # Return a default shape if we couldn't extract it 41 | return [1, 1, 4096] 42 | 43 | 44 | def main(): 45 | # Parse arguments 46 | parser = argparse.ArgumentParser(description="Benchmark two models simultaneously to measure bandwidth utilization") 47 | parser.add_argument("--runs", type=int, default=300, help="Number of benchmark runs per model (default: 300)") 48 | parser.add_argument("--backend", type=str, default="ANE", choices=["CPU", "GPU", "ANE", "ALL"], 49 | help="Backend to use for benchmarking (default: ANE)") 50 | parser.add_argument("--report", type=str, default=None, help="Generate HTML report with this filename") 51 | parser.add_argument("--no-browser", action="store_true", help="Don't open the report in a browser") 52 | 53 | args = parser.parse_args() 54 | 55 | # Create benchmark instance 56 | benchmark = Benchmark() 57 | 58 | # Specify the exact models to use 59 | model1_name = "llama_lm_head" 60 | model2_name = "DeepHermes_lm_head" 61 | 62 | print(f"Using models: {model1_name} and {model2_name}") 63 | 64 | # Load the models 65 | cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "anemll-bench", "models") 66 | 67 | model1_path = os.path.join(cache_dir, f"{model1_name}.mlpackage") 68 | model2_path = os.path.join(cache_dir, f"{model2_name}.mlpackage") 69 | 70 | # Map string backend to CoreML compute units 71 | compute_units_map = { 72 | "CPU": ct.ComputeUnit.CPU_ONLY, 73 | "GPU": ct.ComputeUnit.CPU_AND_GPU, 74 | "ANE": ct.ComputeUnit.CPU_AND_NE, 75 | "ALL": ct.ComputeUnit.ALL 76 | } 77 | compute_unit = compute_units_map.get(args.backend, ct.ComputeUnit.CPU_AND_NE) 78 | 79 | # Load models directly using CoreML 80 | print(f"\nLoading models...") 81 | 82 | try: 83 | # Load both models 84 | model1 = ct.models.model.MLModel(model1_path, compute_units=compute_unit) 85 | model2 = ct.models.model.MLModel(model2_path, compute_units=compute_unit) 86 | 87 | # Extract input shapes directly from the models 88 | model1_input_shape = extract_input_shape_from_model(model1) 89 | model2_input_shape = extract_input_shape_from_model(model2) 90 | 91 | print(f"Model 1 input shape: {model1_input_shape}") 92 | print(f"Model 2 input shape: {model2_input_shape}") 93 | 94 | # Run dual benchmark 95 | if model1 and model2: 96 | # Run dual benchmark 97 | dual_results = benchmark.benchmark_dual_models( 98 | model1=model1, 99 | model1_name=model1_name, 100 | model1_input_shape=model1_input_shape, 101 | model2=model2, 102 | model2_name=model2_name, 103 | model2_input_shape=model2_input_shape, 104 | backend=args.backend, 105 | num_runs=args.runs 106 | ) 107 | 108 | # Get the results to calculate averages 109 | if isinstance(dual_results, dict) and 'parallel_results' in dual_results: 110 | parallel_results = dual_results['parallel_results'] 111 | if len(parallel_results) >= 3: # We should have two individual model results plus the combined result 112 | # The third result should be the combined performance 113 | combined_result = parallel_results[2] 114 | print(f"Combined performance: {combined_result.inference_time_ms:.2f} ms, {combined_result.throughput_gb_s:.2f} GB/s") 115 | 116 | # Get the report directory 117 | reports_dir = os.path.join(os.path.expanduser("~"), ".cache", "anemll-bench", "reports") 118 | os.makedirs(reports_dir, exist_ok=True) 119 | 120 | # Generate report if requested 121 | timestamp = time.strftime("%Y%m%d_%H%M%S") 122 | report_filename = args.report or f"dual_benchmark_report_{timestamp}.html" 123 | report_path = os.path.join(reports_dir, report_filename) 124 | 125 | benchmark.generate_report(output_path=report_path, include_charts=True, auto_open=False) # Don't auto-open in coremltools 126 | 127 | print(f"\nReport saved to: {report_path}") 128 | 129 | # Open the report in the default web browser if not disabled 130 | if not args.no_browser: 131 | try: 132 | print(f"Opening report in web browser...") 133 | # Use system 'open' command for macOS, which works more reliably 134 | if platform.system() == 'Darwin': # macOS 135 | subprocess.call(['open', report_path]) 136 | else: # Try the webbrowser module for other platforms 137 | import webbrowser 138 | webbrowser.open(f"file://{report_path}") 139 | except Exception as e: 140 | print(f"Error opening report in browser: {e}") 141 | else: 142 | print("Error loading models.") 143 | return 1 144 | 145 | except Exception as e: 146 | print(f"Error loading or benchmarking models: {e}") 147 | return 1 148 | 149 | return 0 150 | 151 | 152 | if __name__ == "__main__": 153 | sys.exit(main()) -------------------------------------------------------------------------------- /examples/basic_benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Basic example demonstrating how to use ANEMLL-Bench to benchmark 4 | models on the Apple Neural Engine 5 | """ 6 | 7 | import os 8 | import sys 9 | import json 10 | import torch 11 | import argparse 12 | from transformers import AutoModelForCausalLM, AutoModel 13 | 14 | # Add parent directory to path to import anemll_bench 15 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 16 | 17 | from anemll_bench import Benchmark 18 | from anemll_bench.utils.system_info import get_system_info 19 | 20 | 21 | def parse_args(): 22 | """Parse command line arguments""" 23 | parser = argparse.ArgumentParser(description='ANEMLL-Bench: Apple Neural Engine Benchmarking Tool') 24 | parser.add_argument('--model', type=str, default="microsoft/phi-2", 25 | help='Hugging Face model ID to benchmark') 26 | parser.add_argument('--sequence-length', type=int, default=128, 27 | help='Sequence length for the input') 28 | parser.add_argument('--batch-size', type=int, default=1, 29 | help='Batch size for the input') 30 | parser.add_argument('--runs', type=int, default=50, 31 | help='Number of benchmark runs') 32 | parser.add_argument('--output', type=str, default="benchmark_report.html", 33 | help='Path to save the benchmark report') 34 | parser.add_argument('--config', type=str, default=None, 35 | help='Optional: Path to benchmark configuration JSON file') 36 | parser.add_argument('--upload', action='store_true', 37 | help='Upload the report to a sharing service') 38 | parser.add_argument('--upload-service', type=str, default='jsonbin', choices=['gist', 'pastebin', 'jsonbin'], 39 | help='Service to upload the report to') 40 | return parser.parse_args() 41 | 42 | 43 | def main(): 44 | """Run a basic benchmark example""" 45 | args = parse_args() 46 | 47 | # Print system information 48 | system_info = get_system_info() 49 | print("\n=== System Information ===") 50 | print(f"Mac Model: {system_info.get('mac_model', 'Unknown')}") 51 | print(f"OS: {system_info.get('os', {}).get('name', 'Unknown')} {system_info.get('os', {}).get('release', '')}") 52 | print(f"CPU: {system_info.get('cpu', {}).get('brand', 'Unknown')}") 53 | print(f"RAM: {system_info.get('ram', {}).get('total_gb', 'Unknown')} GB") 54 | print(f"Apple Silicon: {'Yes' if system_info.get('apple_silicon', False) else 'No'}") 55 | print(f"Python: {system_info.get('python_version', 'Unknown')}") 56 | print("===========================\n") 57 | 58 | # Initialize benchmark with optional config file 59 | benchmark = Benchmark(config_path=args.config) 60 | 61 | if args.config: 62 | print(f"Running benchmarks from config file: {args.config}") 63 | benchmark.run() 64 | else: 65 | print(f"Benchmarking model: {args.model}") 66 | try: 67 | # Create input shape 68 | # For text models: [batch_size, sequence_length] 69 | # For vision models or more complex shapes, this would need to be adjusted 70 | print(f"Loading model from Hugging Face: {args.model}") 71 | try: 72 | # Try to load as causal LM first 73 | model = AutoModelForCausalLM.from_pretrained( 74 | args.model, 75 | torch_dtype=torch.float16, 76 | device_map="cpu" # Ensure it's loaded on CPU first 77 | ) 78 | print("Model loaded as Causal LM") 79 | except Exception as e: 80 | print(f"Could not load as Causal LM, trying generic model: {e}") 81 | model = AutoModel.from_pretrained( 82 | args.model, 83 | torch_dtype=torch.float16, 84 | device_map="cpu" 85 | ) 86 | print("Model loaded as generic model") 87 | 88 | # Get model's hidden size - different models have different attributes 89 | hidden_size = 768 # Default fallback 90 | if hasattr(model.config, 'hidden_size'): 91 | hidden_size = model.config.hidden_size 92 | elif hasattr(model.config, 'd_model'): 93 | hidden_size = model.config.d_model 94 | 95 | # Standard input shape for transformer models 96 | input_shape = [args.batch_size, args.sequence_length, hidden_size] 97 | print(f"Using input shape: {input_shape}") 98 | 99 | # Benchmark on CPU first 100 | print("\n=== Running CPU Benchmark ===") 101 | cpu_result = benchmark.benchmark_model( 102 | model=model, 103 | model_name=args.model, 104 | input_shape=input_shape, 105 | backend="CPU", 106 | num_runs=args.runs 107 | ) 108 | 109 | # Benchmark on Apple Neural Engine 110 | print("\n=== Running ANE Benchmark ===") 111 | ane_result = benchmark.benchmark_model( 112 | model=model, 113 | model_name=args.model, 114 | input_shape=input_shape, 115 | backend="ANE", 116 | num_runs=args.runs 117 | ) 118 | 119 | # Calculate speedup 120 | speedup = cpu_result.inference_time_ms / ane_result.inference_time_ms if ane_result.inference_time_ms > 0 else 0 121 | 122 | print("\n=== Benchmark Summary ===") 123 | print(f"Model: {args.model}") 124 | print(f"CPU Inference: {cpu_result.inference_time_ms:.2f} ms, {cpu_result.tflops:.4f} TFLOPS") 125 | print(f"ANE Inference: {ane_result.inference_time_ms:.2f} ms, {ane_result.tflops:.4f} TFLOPS") 126 | print(f"ANE Throughput: {ane_result.throughput_gbps:.2f} GB/s") 127 | print(f"Speedup: {speedup:.2f}x") 128 | 129 | # Generate report 130 | print(f"\nGenerating report to {args.output}...") 131 | report_url = benchmark.generate_report( 132 | output_path=args.output, 133 | upload=args.upload, 134 | upload_service=args.upload_service 135 | ) 136 | 137 | if report_url: 138 | print(f"Report uploaded to: {report_url}") 139 | print("You can share this URL to let others view your benchmark results.") 140 | 141 | except Exception as e: 142 | print(f"Error benchmarking model: {e}") 143 | 144 | print("\nBenchmark complete! Check the reports directory for results.") 145 | 146 | 147 | if __name__ == "__main__": 148 | main() -------------------------------------------------------------------------------- /Results.MD: -------------------------------------------------------------------------------- 1 | # ANEMLL-Bench Results 2 | 3 | This document presents benchmark results for various machine learning models on different Apple Silicon chips, focusing on Neural Engine (ANE) performance. 4 | 5 | ## ⚠️ ATTENTION: MORE M3 RESULTS NEEDED! ⚠️ 6 | 7 | **We now have M3 Max data but still need more M3 series benchmarks!** If you have access to any other M3 chip variant (M3, M3 Pro, or M3 Ultra), please consider running the benchmarks and submitting your results. Your contribution will help complete our cross-generation performance analysis. 8 | 9 | *Submit results to: [realanemll@gmail.com](mailto:realanemll@gmail.com) or [open an issue](https://github.com/Anemll/anemll-bench/issues/new)* 10 | 11 | ## Overview 12 | 13 | ANEMLL-Bench measures two primary metrics: 14 | 1. **Memory Bandwidth (GB/s)**: How Apple Chip Generation utilizes memory bandwidth (higher is better) 15 | 2. **Inference Time (ms)**: How quickly the model produces results (lower is better) 16 | 17 | Higher memory bandwidth and lower inference time indicate better performance. 18 | 19 | ## Apple Silicon Performance Comparison 20 | 21 | The chart below shows performance comparison across Apple Silicon generations for the `llama_lm_head` model: 22 | 23 |  24 | 25 | As shown in the visualization: 26 | - **M4 Series** chips demonstrate approximately 2.3x higher memory bandwidth compared to M1 series 27 | - **M3 Max** shows impressive memory bandwidth (2.2x over M1 series) and excellent inference performance (1.9x faster than M1) 28 | - **Base M2** has slightly better bandwidth than M1 series but slightly worse inference time 29 | - The M2 Max/Ultra and M4 base model show modest improvements, while M3 Max and high-end M4 variants represent significant leaps in performance 30 | - **M5** shows solid performance with good memory bandwidth improvement (1.3x) and faster inference times (1.2x), positioning it competitively between M2 and M3 Max performance levels 31 | 32 | ## Detailed Benchmark Results 33 | 34 | ### llama_lm_head Model (Standard) 35 | 36 | | Chip | Memory Bandwidth (GB/s) | Inference Time (ms) | Bandwidth Factor | Inference Factor | 37 | |------|------------------------|---------------------|------------------|------------------| 38 | | M1 | 60.87 | 7.52 | 1.1x | 1.0x | 39 | | M1 Pro | 54.90 | 7.45 | 1.0x | 1.0x | 40 | | M1 Max | 54.62 | 7.61 | 1.0x | 1.0x | 41 | | M1 Ultra | 54.72 | 7.58 | 1.0x | 1.0x | 42 | | M2 | 60.45 | 8.67 | 1.1x | 0.9x | 43 | | M2 Max | 62.01 | 6.64 | 1.1x | 1.1x | 44 | | M2 Ultra | 61.68 | 6.70 | 1.1x | 1.1x | 45 | | M3 | 63.10 | 6.95 | 1.2x | 1.1x | 46 | | M3 Max | 120.22 | 3.98 | 2.2x | 1.9x | 47 | | M4 | 64.18 | 6.45 | 1.2x | 1.2x | 48 | | M4 Pro 24GB Mini| 126.36 | 3.85 | 2.3x | 2.0x | 49 | | M4 Max | 118.88 | 3.87 | 2.2x | 2.0x | 50 | | M5 | 70.21 | 6.10 | 1.3x | 1.2x | 51 | 52 | ### Key Observations 53 | 54 | 1. **Neural Engine Scaling**: 55 | - All M1 variants (Pro, Max, Ultra) show very similar performance, suggesting limited Neural Engine scaling in first-generation Apple Silicon 56 | - Similar pattern with M2 Ultra vs M2 Max 57 | - The base M2 chip has slightly better bandwidth than M1 series, but surprisingly worse inference time 58 | - M3 Max shows excellent memory bandwidth utilization and very good inference times, competitive with M4 series 59 | - M4 series demonstrates slightly better performance across both metrics compared to M3 Max 60 | - The base M4 16GB shows modest improvements (~1.2x) over M1 series, significantly underperforming both M3 Max and higher-end M4 variants 61 | - **M5 shows solid performance**: It achieves 1.3x memory bandwidth improvement over M1 series (70.21 GB/s) and 1.2x faster inference times (6.10 ms), positioning it competitively between M2 and M3 Max performance levels 62 | 63 | 2. **Memory Bandwidth Efficiency**: 64 | - M3 Max and high-end M4 series show ~2.2-2.3x improvement in memory bandwidth utilization 65 | - Base M2 shows only a minor improvement (1.1x) over M1 series 66 | - This indicates that the substantial architectural improvements in the Neural Engine started with the M3 generation 67 | - Entry-level M4 16GB shows ~1.2x improvement, suggesting memory configuration or thermal constraints may be factors 68 | - M5 achieves 1.3x memory bandwidth improvement, falling between M2 and M3 Max performance levels 69 | 70 | 3. **Inference Time Improvements**: 71 | - Base M2 actually performs slightly worse (0.9x) than M1 series for inference time 72 | - M2 Max/Ultra models show modest improvements (1.1x) over M1 series 73 | - M3 Max shows impressive inference performance (3.98 ms), nearly as fast as the best M4 chips 74 | - High-end M4 chips are only marginally faster (3.85-3.87 ms) than the M3 Max 75 | - This suggests the major architectural leap for Neural Engine inference performance occurred with the M3 generation 76 | - M4 represents a more incremental improvement over M3 Max for inference workloads 77 | - **M5 shows solid inference performance** (6.10 ms), achieving 1.2x improvement over M1 series, which aligns well with its memory bandwidth improvements and positions it competitively in the performance landscape 78 | 79 | ## Running Your Own Benchmarks 80 | 81 | To reproduce these results or run benchmarks on your own device: 82 | 83 | ```bash 84 | # Install dependencies 85 | pip install -r requirements.txt 86 | pip install -e . 87 | 88 | # Download optimized models for your macOS version 89 | python examples/sync_models.py 90 | 91 | # Run benchmarks on all available models 92 | python examples/benchmark_all_models.py 93 | 94 | # Generate visualization of results 95 | python examples/plot_chip_comparison.py --save 96 | ``` 97 | 98 | ## Contributing Results 99 | 100 | We're building a comprehensive benchmark database across all Apple Silicon variants. Please consider submitting your benchmark results by: 101 | 102 | 1. Running the benchmarks using the instructions above 103 | 2. Opening an issue on our GitHub repository with your results 104 | 3. Or emailing your results to realanemll@gmail.com 105 | 106 | When submitting results, please include: 107 | - Your exact device model (e.g., "MacBook Pro 14" 2023, M3 Pro 12-core CPU, 18-core GPU") 108 | - macOS version 109 | - Any cooling modifications or environmental factors 110 | - The complete benchmark report 111 | 112 | ## Analyzing Your Results 113 | 114 | When analyzing your benchmark results, consider: 115 | 116 | 1. **Relative Performance**: How does your chip compare to others in the same family? 117 | 2. **Scaling Efficiency**: If you have a Pro/Max/Ultra variant, how efficiently does it scale? 118 | 3. **Model-Specific Performance**: Different model architectures may perform differently on the same hardware 119 | 120 | ## Future Work 121 | 122 | We plan to expand our benchmarks to include: 123 | - More diverse model architectures 124 | - Power efficiency measurements (performance per watt) 125 | - Sustained performance under thermal constraints 126 | - Newer versions of CoreML and PyTorch 127 | 128 | ## Acknowledgements 129 | 130 | Thanks to all contributors who have submitted benchmark results and helped improve ANEMLL-Bench. 131 | -------------------------------------------------------------------------------- /examples/test_model_loading.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Test script for manually loading and running CoreML models 4 | """ 5 | 6 | import os 7 | import sys 8 | import logging 9 | import numpy as np 10 | import argparse 11 | 12 | try: 13 | import coremltools as ct 14 | except ImportError: 15 | print("CoreML Tools not installed. Please install with: pip install coremltools") 16 | sys.exit(1) 17 | 18 | # Configure logging 19 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 20 | logger = logging.getLogger(__name__) 21 | 22 | def list_models_in_cache(): 23 | """List all models in the cache directory""" 24 | cache_dir = os.path.expanduser("~/.cache/anemll-bench/models") 25 | 26 | if not os.path.exists(cache_dir): 27 | logger.error(f"Cache directory not found: {cache_dir}") 28 | return [] 29 | 30 | models = os.listdir(cache_dir) 31 | return [os.path.join(cache_dir, model) for model in models] 32 | 33 | def load_model_with_various_methods(model_path): 34 | """Try loading a model with different approaches""" 35 | logger.info(f"Testing model loading for: {model_path}") 36 | 37 | # Print model directory contents for mlmodelc 38 | if os.path.isdir(model_path): 39 | logger.info(f"Model directory contents:") 40 | for root, dirs, files in os.walk(model_path): 41 | rel_path = os.path.relpath(root, model_path) 42 | prefix = "└── " if rel_path == "." else f"├── {rel_path}/" 43 | for file in files: 44 | logger.info(f"{prefix}{file}") 45 | 46 | methods_tried = 0 47 | errors = [] 48 | 49 | # Method 1: Standard loading 50 | try: 51 | logger.info("Method 1: Trying standard loading") 52 | model = ct.models.MLModel(model_path) 53 | logger.info("✅ SUCCESS: Standard loading worked") 54 | 55 | # Get model spec information 56 | logger.info(f"Model inputs: {model.get_spec().description.input}") 57 | logger.info(f"Model outputs: {model.get_spec().description.output}") 58 | 59 | return model 60 | except Exception as e: 61 | methods_tried += 1 62 | errors.append(f"Method 1 failed: {str(e)}") 63 | logger.warning(f"Method 1 failed: {str(e)}") 64 | 65 | # Method 2: Loading with skip_model_load=True 66 | try: 67 | logger.info("Method 2: Trying with skip_model_load=True") 68 | model = ct.models.MLModel(model_path, skip_model_load=True) 69 | logger.info("✅ SUCCESS: Loading with skip_model_load worked") 70 | 71 | # Get model spec information 72 | try: 73 | logger.info(f"Model inputs: {model.get_spec().description.input}") 74 | logger.info(f"Model outputs: {model.get_spec().description.output}") 75 | except: 76 | logger.warning("Couldn't get model spec information") 77 | 78 | return model 79 | except Exception as e: 80 | methods_tried += 1 81 | errors.append(f"Method 2 failed: {str(e)}") 82 | logger.warning(f"Method 2 failed: {str(e)}") 83 | 84 | # Method 3: Create a dummy spec and try to run prediction 85 | if os.path.isdir(model_path) and os.path.exists(os.path.join(model_path, "model.mil")): 86 | try: 87 | logger.info("Method 3: Creating a custom model") 88 | 89 | # Create a dummy model 90 | from coremltools.proto import Model_pb2 91 | 92 | # Create a minimal spec 93 | spec = Model_pb2.Model() 94 | spec.specificationVersion = 5 95 | 96 | # Add input description 97 | input_desc = spec.description.input.add() 98 | input_desc.name = "input_ids" 99 | input_desc.type.multiArrayType.shape.append(1) # Batch 100 | input_desc.type.multiArrayType.shape.append(512) # Sequence length 101 | input_desc.type.multiArrayType.dataType = Model_pb2.ArrayFeatureType.FLOAT32 102 | 103 | # Create dummy model 104 | dummy_model = ct.models.MLModel(spec) 105 | dummy_model.path = model_path 106 | 107 | logger.info("✅ SUCCESS: Created dummy model") 108 | return dummy_model 109 | except Exception as e: 110 | methods_tried += 1 111 | errors.append(f"Method 3 failed: {str(e)}") 112 | logger.warning(f"Method 3 failed: {str(e)}") 113 | 114 | # If all methods failed 115 | logger.error(f"All {methods_tried} loading methods failed.") 116 | for error in errors: 117 | logger.error(f" - {error}") 118 | 119 | return None 120 | 121 | def run_model_inference(model, model_path): 122 | """Try to run inference on the model""" 123 | if model is None: 124 | logger.error("No model to run inference on") 125 | return 126 | 127 | logger.info(f"Testing inference on model: {model_path}") 128 | 129 | try: 130 | # Create a dummy input 131 | dummy_input = {"input_ids": np.random.rand(1, 512).astype(np.float32)} 132 | 133 | # Try to run inference 134 | logger.info("Running inference with dummy input...") 135 | result = model.predict(dummy_input) 136 | 137 | logger.info("✅ SUCCESS: Model inference successful!") 138 | logger.info(f"Prediction result keys: {list(result.keys())}") 139 | 140 | # Print first few values of first output 141 | first_output_key = list(result.keys())[0] 142 | first_output = result[first_output_key] 143 | logger.info(f"First output ({first_output_key}) shape: {first_output.shape}") 144 | logger.info(f"First few values: {first_output.flatten()[:5]}") 145 | 146 | except Exception as e: 147 | logger.error(f"Inference failed: {str(e)}") 148 | 149 | def main(): 150 | parser = argparse.ArgumentParser(description="Test CoreML model loading") 151 | parser.add_argument("--model", type=str, help="Path to the model to test") 152 | parser.add_argument("--list-models", action="store_true", help="List all models in cache") 153 | parser.add_argument("--skip-inference", action="store_true", help="Skip inference test") 154 | args = parser.parse_args() 155 | 156 | if args.list_models: 157 | models = list_models_in_cache() 158 | logger.info(f"Found {len(models)} models in cache:") 159 | for model in models: 160 | logger.info(f" - {model}") 161 | return 162 | 163 | # Test loading one specific model or all models in cache 164 | if args.model: 165 | model_path = os.path.expanduser(args.model) 166 | model = load_model_with_various_methods(model_path) 167 | 168 | if model and not args.skip_inference: 169 | run_model_inference(model, model_path) 170 | else: 171 | # Test all models in cache 172 | models = list_models_in_cache() 173 | logger.info(f"Testing {len(models)} models in cache") 174 | 175 | for model_path in models: 176 | model = load_model_with_various_methods(model_path) 177 | 178 | if model and not args.skip_inference: 179 | run_model_inference(model, model_path) 180 | 181 | logger.info("-" * 80) 182 | 183 | if __name__ == "__main__": 184 | main() -------------------------------------------------------------------------------- /anemll_bench/utils/visualization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Visualization utilities for ANEMLL-Bench. 3 | 4 | This module provides functions to visualize benchmark results using 5 | matplotlib and other plotting libraries. 6 | """ 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from typing import List, Union, Dict, Optional, Tuple 11 | import os 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def plot_chip_comparison( 18 | chips: List[str], 19 | bandwidth: List[float], 20 | inference: List[float], 21 | bandwidth_factor: Optional[List[str]] = None, 22 | inference_factor: Optional[List[str]] = None, 23 | figsize: Tuple[int, int] = (10, 6), 24 | save_path: Optional[str] = None, 25 | show_plot: bool = True, 26 | title: str = 'Chip Performance Comparison', 27 | bandwidth_color: str = 'steelblue', 28 | inference_color: str = 'indianred', 29 | ) -> plt.Figure: 30 | """ 31 | Create a dual-axis bar chart comparing bandwidth and inference time across different chips. 32 | 33 | Args: 34 | chips: List of chip names to display on x-axis 35 | bandwidth: List of memory bandwidth values in GB/s 36 | inference: List of inference time values in ms 37 | bandwidth_factor: Optional list of factors relative to baseline (e.g., '1.0x', '2.3x') 38 | inference_factor: Optional list of factors relative to baseline (e.g., '1.0x', '2.0x') 39 | figsize: Tuple specifying figure dimensions (width, height) in inches 40 | save_path: Optional path to save the figure 41 | show_plot: Whether to display the plot (plt.show()) 42 | title: Plot title 43 | bandwidth_color: Color for bandwidth bars 44 | inference_color: Color for inference time bars 45 | 46 | Returns: 47 | Figure object containing the plot 48 | """ 49 | if len(chips) != len(bandwidth) or len(chips) != len(inference): 50 | raise ValueError("Length of chips, bandwidth, and inference lists must be equal") 51 | 52 | if bandwidth_factor and len(bandwidth_factor) != len(chips): 53 | raise ValueError("Length of bandwidth_factor must match chips") 54 | 55 | if inference_factor and len(inference_factor) != len(chips): 56 | raise ValueError("Length of inference_factor must match chips") 57 | 58 | # Setup positions and bar width 59 | x = np.arange(len(chips)) 60 | width = 0.35 61 | 62 | # Create the figure and twin axes 63 | fig, ax1 = plt.subplots(figsize=figsize) 64 | ax2 = ax1.twinx() 65 | 66 | # Plot the bandwidth bars on ax1 (left y-axis) 67 | bars1 = ax1.bar(x - width/2, bandwidth, width, label='Bandwidth (GB/s)', color=bandwidth_color) 68 | 69 | # Plot the inference time bars on ax2 (right y-axis) 70 | bars2 = ax2.bar(x + width/2, inference, width, label='Inference Time (ms)', color=inference_color) 71 | 72 | # Configure the x-axis 73 | ax1.set_xticks(x) 74 | ax1.set_xticklabels(chips) 75 | ax1.set_xlabel('Chip') 76 | 77 | # Set y-axis labels 78 | ax1.set_ylabel('Bandwidth (GB/s)\n(higher is better)', color=bandwidth_color) 79 | ax2.set_ylabel('Inference Time (ms)\n(lower is better)', color=inference_color) 80 | 81 | # Set title and legends 82 | plt.title(title) 83 | ax1.legend(loc='upper left') 84 | ax2.legend(loc='upper right') 85 | 86 | # Annotate the bandwidth factor labels if provided 87 | if bandwidth_factor: 88 | for i, bar in enumerate(bars1): 89 | x_center = bar.get_x() + bar.get_width() / 2 90 | ax1.text(x_center, -0.08, bandwidth_factor[i], 91 | ha='center', va='top', 92 | transform=ax1.get_xaxis_transform(), 93 | fontsize=8, color='black') 94 | 95 | # Annotate the inference factor labels if provided 96 | if inference_factor: 97 | for i, bar in enumerate(bars2): 98 | x_center = bar.get_x() + bar.get_width() / 2 99 | ax2.text(x_center, -0.08, inference_factor[i], 100 | ha='center', va='top', 101 | transform=ax2.get_xaxis_transform(), 102 | fontsize=8, color='black') 103 | 104 | # Check if any chip labels contain newlines and add more bottom padding if needed 105 | if any('\n' in chip for chip in chips): 106 | plt.subplots_adjust(bottom=0.18) # Increase bottom margin for multi-line labels 107 | 108 | plt.tight_layout() 109 | 110 | # Save the figure if a path is provided 111 | if save_path: 112 | try: 113 | plt.savefig(save_path, dpi=300, bbox_inches='tight') 114 | logger.info(f"Figure saved to {save_path}") 115 | except Exception as e: 116 | logger.error(f"Failed to save figure: {e}") 117 | 118 | # Show the plot if requested 119 | if show_plot: 120 | plt.show() 121 | 122 | return fig 123 | 124 | 125 | def plot_benchmark_results( 126 | benchmark_data: Dict, 127 | model_name: Optional[str] = None, 128 | plot_type: str = 'comparison', 129 | plot_title: Optional[str] = None, 130 | save_dir: Optional[str] = None, 131 | show_plot: bool = True, 132 | ) -> Optional[str]: 133 | """ 134 | Create visualizations from benchmark results. 135 | 136 | Args: 137 | benchmark_data: Dictionary containing benchmark results 138 | model_name: Optional name of the model for the title 139 | plot_type: Type of plot to generate ('comparison', 'timeline', etc.) 140 | plot_title: Optional custom plot title 141 | save_dir: Optional directory to save the generated figures 142 | show_plot: Whether to display the plots 143 | 144 | Returns: 145 | Path to the saved figure if save_dir is provided, otherwise None 146 | """ 147 | # This is a placeholder function for future expansion 148 | # Currently just delegates to plot_chip_comparison 149 | 150 | if plot_type == 'comparison' and 'chips' in benchmark_data: 151 | # Extract data from benchmark_data 152 | chips = benchmark_data.get('chips', []) 153 | bandwidth = benchmark_data.get('bandwidth', []) 154 | inference = benchmark_data.get('inference', []) 155 | bandwidth_factor = benchmark_data.get('bandwidth_factor', None) 156 | inference_factor = benchmark_data.get('inference_factor', None) 157 | 158 | # Create title 159 | if plot_title: 160 | title = plot_title 161 | else: 162 | title = f'ANEMLL-BENCH: Apple Neural Engine Performance Comparison' 163 | 164 | if model_name: 165 | title += f' - {model_name}' 166 | 167 | # Create save path if directory is provided 168 | save_path = None 169 | if save_dir: 170 | os.makedirs(save_dir, exist_ok=True) 171 | filename = f"chip_comparison{'_' + model_name if model_name else ''}.png" 172 | save_path = os.path.join(save_dir, filename) 173 | 174 | # Create the plot 175 | fig = plot_chip_comparison( 176 | chips=chips, 177 | bandwidth=bandwidth, 178 | inference=inference, 179 | bandwidth_factor=bandwidth_factor, 180 | inference_factor=inference_factor, 181 | title=title, 182 | save_path=save_path, 183 | show_plot=show_plot 184 | ) 185 | 186 | return save_path 187 | 188 | else: 189 | logger.warning(f"Unsupported plot type: {plot_type}") 190 | return None -------------------------------------------------------------------------------- /examples/generate_results_report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Script to automate the process of generating benchmark reports, visualizations, 4 | and updating the Results.MD file with latest benchmark data. 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | import datetime 11 | import json 12 | from pathlib import Path 13 | 14 | # Add parent directory to path to allow running this script directly 15 | parent_dir = str(Path(__file__).resolve().parent.parent) 16 | if parent_dir not in sys.path: 17 | sys.path.insert(0, parent_dir) 18 | 19 | from anemll_bench.utils import plot_chip_comparison, plot_benchmark_results 20 | 21 | 22 | def run_benchmarks(args): 23 | """Run benchmarks if requested""" 24 | if args.run_benchmarks: 25 | print("Running benchmarks on all available models...") 26 | benchmark_cmd = f"python examples/benchmark_all_models.py --runs {args.runs}" 27 | if args.no_sync: 28 | benchmark_cmd += " --no-sync" 29 | if args.use_local: 30 | benchmark_cmd += " --use-local" 31 | result = os.system(benchmark_cmd) 32 | if result != 0: 33 | print("Error running benchmarks. Check logs for details.") 34 | return False 35 | return True 36 | 37 | 38 | def generate_visualizations(args): 39 | """Generate visualization charts""" 40 | print("Generating visualization charts...") 41 | 42 | # Create reports directory if it doesn't exist 43 | os.makedirs(args.output_dir, exist_ok=True) 44 | 45 | # Sample data from benchmark results 46 | # In a real implementation, this would come from parsing the benchmark results file 47 | chips = ['M1', 'M1 Pro', 'M1 Max', 'M1 Ultra', 'M2', 'M2 Max', 'M2 Ultra', 'M3 Max', 'M4', 'M4 Pro\n24GB Mini', 'M4 Max'] 48 | bandwidth = [60.87, 54.90, 54.62, 54.72, 60.45, 62.01, 61.68, 120.22, 64.18, 126.36, 118.88] # GB/s (llama_lm_head) 49 | inference = [7.52, 7.45, 7.61, 7.58, 8.67, 6.64, 6.70, 3.98, 6.45, 3.85, 3.87] # ms (llama_lm_head_lut6) 50 | bandwidth_factor = ['1.1x', '1.0x', '1.0x', '1.0x', '1.1x', '1.1x', '1.1x', '2.2x', '1.2x', '2.3x', '2.2x'] 51 | inference_factor = ['1.0x', '1.0x', '1.0x', '1.0x', '0.9x', '1.1x', '1.1x', '1.9x', '1.2x', '2.0x', '2.0x'] 52 | 53 | # Create benchmark data dictionary 54 | benchmark_data = { 55 | 'chips': chips, 56 | 'bandwidth': bandwidth, 57 | 'inference': inference, 58 | 'bandwidth_factor': bandwidth_factor, 59 | 'inference_factor': inference_factor, 60 | } 61 | 62 | # Generate visualization for llama_lm_head model 63 | output_path = plot_benchmark_results( 64 | benchmark_data=benchmark_data, 65 | model_name="llama_lm_head", 66 | plot_title="ANEMLL-BENCH: Apple Neural Engine Performance Comparison", 67 | save_dir=args.output_dir, 68 | show_plot=False 69 | ) 70 | 71 | print(f"Generated visualization: {output_path}") 72 | return benchmark_data 73 | 74 | 75 | def update_results_md(benchmark_data, args): 76 | """Update Results.MD with latest benchmark data""" 77 | print("Updating Results.MD with latest benchmark data...") 78 | 79 | results_md_path = os.path.join(parent_dir, "Results.MD") 80 | 81 | # Check if Results.MD exists 82 | if not os.path.exists(results_md_path): 83 | print("Results.MD not found. Creating new file...") 84 | 85 | # Generate Results.MD content 86 | content = f"""# ANEMLL-Bench Results 87 | 88 | This document presents benchmark results for various machine learning models on different Apple Silicon chips, focusing on Neural Engine (ANE) performance. 89 | 90 | ## Overview 91 | 92 | ANEMLL-Bench measures two primary metrics: 93 | 1. **Memory Bandwidth (GB/s)**: How Apple Chip Generation utilizes memory bandwidth 94 | 2. **Inference Time (ms)**: How quickly the model produces results 95 | 96 | Higher memory bandwidth and lower inference time indicate better performance. 97 | 98 | ## Apple Silicon Performance Comparison 99 | 100 | The chart below shows performance comparison across Apple Silicon generations for the `llama_lm_head` model: 101 | 102 |  103 | 104 | As shown in the visualization: 105 | - **M4 Series** chips demonstrate approximately 2.3x higher memory bandwidth compared to M1 series 106 | - **M4 Series** inference times are approximately 2.0x faster than M1 series 107 | - The improvements from M1 to M2 were modest (~1.1x), while M4 represents a significant leap 108 | 109 | ## Detailed Benchmark Results 110 | 111 | ### llama_lm_head Model (Standard) 112 | 113 | | Chip | Memory Bandwidth (GB/s) | Inference Time (ms) | Bandwidth Factor | Inference Factor | 114 | |------|------------------------|---------------------|------------------|------------------|""" 115 | 116 | # Add benchmark data to table 117 | for i in range(len(benchmark_data['chips'])): 118 | content += f""" 119 | | {benchmark_data['chips'][i]} | {benchmark_data['bandwidth'][i]} | {benchmark_data['inference'][i]} | {benchmark_data['bandwidth_factor'][i]} | {benchmark_data['inference_factor'][i]} |""" 120 | 121 | content += """ 122 | 123 | ### Key Observations 124 | 125 | 1. **Neural Engine Scaling**: 126 | - The M1 Ultra shows minimal performance gains over M1 Max, suggesting limited Neural Engine scaling in first-generation Apple Silicon 127 | - Similar pattern with M2 Ultra vs M2 Max 128 | - M4 series demonstrates better scaling and significantly improved performance 129 | 130 | 2. **Memory Bandwidth Efficiency**: 131 | - M4 series shows a ~2.3x improvement in memory bandwidth utilization 132 | - This indicates substantial architectural improvements in the Neural Engine 133 | 134 | 3. **Inference Time Improvements**: 135 | - M4 chips process the same model in approximately half the time compared to M1 chips 136 | - This translates directly to improved user experience for AI applications 137 | 138 | ## Running Your Own Benchmarks 139 | 140 | To reproduce these results or run benchmarks on your own device: 141 | 142 | ```bash 143 | # Install dependencies 144 | pip install -r requirements.txt 145 | pip install -e . 146 | 147 | # Download optimized models for your macOS version 148 | python examples/sync_models.py 149 | 150 | # Run benchmarks on all available models 151 | python examples/benchmark_all_models.py 152 | 153 | # Generate visualization of results 154 | python examples/plot_chip_comparison.py --save 155 | ``` 156 | 157 | ## Contributing Results 158 | 159 | We're building a comprehensive benchmark database across all Apple Silicon variants. Please consider submitting your benchmark results by: 160 | 161 | 1. Running the benchmarks using the instructions above 162 | 2. Opening an issue on our GitHub repository with your results 163 | 3. Or emailing your results to realanemll@gmail.com 164 | 165 | When submitting results, please include: 166 | - Your exact device model (e.g., "MacBook Pro 14" 2023, M3 Pro 12-core CPU, 18-core GPU") 167 | - macOS version 168 | - Any cooling modifications or environmental factors 169 | - The complete benchmark report 170 | 171 | ## Analyzing Your Results 172 | 173 | When analyzing your benchmark results, consider: 174 | 175 | 1. **Relative Performance**: How does your chip compare to others in the same family? 176 | 2. **Scaling Efficiency**: If you have a Pro/Max/Ultra variant, how efficiently does it scale? 177 | 3. **Model-Specific Performance**: Different model architectures may perform differently on the same hardware 178 | 179 | ## Future Work 180 | 181 | We plan to expand our benchmarks to include: 182 | - More diverse model architectures 183 | - Power efficiency measurements (performance per watt) 184 | - Sustained performance under thermal constraints 185 | - Newer versions of CoreML and PyTorch 186 | 187 | ## Acknowledgements 188 | 189 | Thanks to all contributors who have submitted benchmark results and helped improve ANEMLL-Bench. 190 | 191 | --- 192 | *Last updated: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}* 193 | """ 194 | 195 | # Write to Results.MD 196 | with open(results_md_path, 'w') as f: 197 | f.write(content) 198 | 199 | print(f"Updated {results_md_path}") 200 | return True 201 | 202 | 203 | def main(): 204 | parser = argparse.ArgumentParser(description='ANEMLL-Bench Results Generator') 205 | parser.add_argument('--run-benchmarks', action='store_true', help='Run benchmarks before generating results') 206 | parser.add_argument('--runs', type=int, default=300, help='Number of benchmark iterations (if running benchmarks)') 207 | parser.add_argument('--no-sync', action='store_true', help='Skip model synchronization (if running benchmarks)') 208 | parser.add_argument('--use-local', action='store_true', help='Use only local models (if running benchmarks)') 209 | parser.add_argument('--output-dir', type=str, default='./reports', help='Directory to save generated files') 210 | args = parser.parse_args() 211 | 212 | # Step 1: Run benchmarks if requested 213 | if not run_benchmarks(args): 214 | return 215 | 216 | # Step 2: Generate visualizations 217 | benchmark_data = generate_visualizations(args) 218 | 219 | # Step 3: Update Results.MD 220 | update_results_md(benchmark_data, args) 221 | 222 | print("\nResults generation complete! 🎉") 223 | print(f"- Visualizations saved to: {args.output_dir}") 224 | print(f"- Results.MD updated with latest benchmark data") 225 | 226 | if args.run_benchmarks: 227 | print("\nReminder: Consider submitting your benchmark results to help build our database!") 228 | print("Email: realanemll@gmail.com or open an issue on GitHub") 229 | 230 | 231 | if __name__ == "__main__": 232 | main() -------------------------------------------------------------------------------- /anemll_bench/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main entry point for anemll_bench 3 | """ 4 | 5 | import sys 6 | import argparse 7 | from anemll_bench import Benchmark 8 | from anemll_bench.utils.system_info import get_system_info 9 | 10 | 11 | def parse_args(): 12 | """Parse command line arguments""" 13 | parser = argparse.ArgumentParser( 14 | description='ANEMLL-Bench: Apple Neural Engine Benchmarking Tool' 15 | ) 16 | 17 | # Model specification 18 | model_group = parser.add_argument_group('Model Options') 19 | model_group.add_argument('--model', type=str, 20 | help='Hugging Face model ID to benchmark (NOT recommended - use ANEMLL-optimized models instead)') 21 | model_group.add_argument('--config', type=str, 22 | help='Path to benchmark configuration JSON file') 23 | 24 | # Input parameters 25 | input_group = parser.add_argument_group('Input Parameters') 26 | input_group.add_argument('--sequence-length', type=int, default=128, 27 | help='Sequence length for transformer models') 28 | input_group.add_argument('--batch-size', type=int, default=1, 29 | help='Batch size for inference') 30 | input_group.add_argument('--hidden-size', type=int, default=768, 31 | help='Hidden size for transformer models') 32 | 33 | # Benchmark options 34 | bench_group = parser.add_argument_group('Benchmark Options') 35 | bench_group.add_argument('--runs', type=int, default=50, 36 | help='Number of benchmark runs') 37 | bench_group.add_argument('--cpu-only', action='store_true', 38 | help='Only benchmark on CPU (skip ANE)') 39 | bench_group.add_argument('--ane-only', action='store_true', 40 | help='Only benchmark on ANE (skip CPU)') 41 | 42 | # Report options 43 | report_group = parser.add_argument_group('Report Options') 44 | report_group.add_argument('--output', type=str, default="benchmark_report.html", 45 | help='Path to save the benchmark report') 46 | report_group.add_argument('--upload', action='store_true', 47 | help='Upload the report to a sharing service') 48 | report_group.add_argument('--upload-service', type=str, default='jsonbin', 49 | choices=['gist', 'pastebin', 'jsonbin'], 50 | help='Service to upload the report to') 51 | 52 | # System options 53 | system_group = parser.add_argument_group('System Options') 54 | system_group.add_argument('--system-info', action='store_true', 55 | help='Display system information and exit') 56 | system_group.add_argument('--skip-arm64-check', action='store_true', 57 | help='Skip ARM64 validation (for testing only)') 58 | 59 | return parser.parse_args() 60 | 61 | 62 | def display_system_info(): 63 | """Display system information and exit""" 64 | system_info = get_system_info() 65 | 66 | print("\n=== ANEMLL-Bench System Information ===") 67 | print(f"Mac Model: {system_info.get('mac_model', 'Unknown')}") 68 | print(f"OS: {system_info.get('os', {}).get('name', 'Unknown')} " 69 | f"{system_info.get('os', {}).get('release', '')} " 70 | f"{system_info.get('os', {}).get('version', '')}") 71 | print(f"CPU: {system_info.get('cpu', {}).get('brand', 'Unknown')}") 72 | print(f"CPU Cores: {system_info.get('cpu', {}).get('cores', 'Unknown')} physical, " 73 | f"{system_info.get('cpu', {}).get('threads', 'Unknown')} logical") 74 | print(f"RAM: {system_info.get('ram', {}).get('total_gb', 'Unknown')} GB total, " 75 | f"{system_info.get('ram', {}).get('available_gb', 'Unknown')} GB available") 76 | print(f"Apple Silicon: {'Yes' if system_info.get('apple_silicon', False) else 'No'}") 77 | 78 | if system_info.get('apple_silicon', False): 79 | print("\nNeural Engine Information:") 80 | print(f"ANE Available: {'Yes' if system_info.get('neural_engine', {}).get('available', False) else 'No'}") 81 | 82 | print(f"Python Version: {system_info.get('python_version', 'Unknown')}") 83 | print("===================================\n") 84 | 85 | 86 | def main(): 87 | """Main entry point""" 88 | args = parse_args() 89 | 90 | # Just display system info if requested 91 | if args.system_info: 92 | display_system_info() 93 | return 0 94 | 95 | # Check that we have either a model or config 96 | if not args.model and not args.config: 97 | print("Error: Either --model or --config must be specified") 98 | print("\nRecommended: Use ANEMLL-optimized models instead of Hugging Face models:") 99 | print(" python examples/benchmark_all_models.py --use-local --no-sync") 100 | return 1 101 | 102 | # Warn if using Hugging Face model 103 | if args.model: 104 | print("⚠️ WARNING: You're using a Hugging Face model, which is NOT optimized for ANE!") 105 | print(" For best ANE performance, use ANEMLL-optimized models:") 106 | print(" python examples/benchmark_all_models.py --use-local --no-sync") 107 | print("") 108 | print("Continue with Hugging Face model anyway? (y/N): ", end="") 109 | try: 110 | response = input().strip().lower() 111 | if response not in ['y', 'yes']: 112 | print("Benchmark cancelled. Use ANEMLL models for optimal ANE performance.") 113 | return 1 114 | except KeyboardInterrupt: 115 | print("\nBenchmark cancelled.") 116 | return 1 117 | print("Continuing with Hugging Face model (performance may be suboptimal)...") 118 | print("") 119 | 120 | # Initialize benchmark 121 | benchmark = Benchmark(config_path=args.config, skip_arm64_check=args.skip_arm64_check) 122 | 123 | # If we have a config file, run that benchmark 124 | if args.config: 125 | print(f"Running benchmarks from config file: {args.config}") 126 | benchmark.run() 127 | else: 128 | from transformers import AutoModelForCausalLM, AutoModel 129 | import torch 130 | 131 | # Create backends list 132 | backends = [] 133 | if not args.ane_only: 134 | backends.append("CPU") 135 | if not args.cpu_only: 136 | backends.append("ANE") 137 | 138 | try: 139 | # Load the model from HF 140 | print(f"Loading model from Hugging Face: {args.model}") 141 | try: 142 | # Try to load as causal LM first 143 | model = AutoModelForCausalLM.from_pretrained( 144 | args.model, 145 | torch_dtype=torch.float16, 146 | device_map="cpu" # Ensure it's loaded on CPU first 147 | ) 148 | print("Model loaded as Causal LM") 149 | except Exception as e: 150 | print(f"Could not load as Causal LM, trying generic model: {e}") 151 | model = AutoModel.from_pretrained( 152 | args.model, 153 | torch_dtype=torch.float16, 154 | device_map="cpu" 155 | ) 156 | print("Model loaded as generic model") 157 | 158 | # Get model's hidden size - different models have different attributes 159 | hidden_size = args.hidden_size # Default 160 | if hasattr(model.config, 'hidden_size'): 161 | hidden_size = model.config.hidden_size 162 | elif hasattr(model.config, 'd_model'): 163 | hidden_size = model.config.d_model 164 | 165 | # Standard input shape for transformer models 166 | input_shape = [args.batch_size, args.sequence_length, hidden_size] 167 | print(f"Using input shape: {input_shape}") 168 | 169 | # Benchmark on each backend 170 | for backend in backends: 171 | print(f"\n=== Running {backend} Benchmark ===") 172 | benchmark.benchmark_model( 173 | model=model, 174 | model_name=args.model, 175 | input_shape=input_shape, 176 | backend=backend, 177 | num_runs=args.runs 178 | ) 179 | 180 | # Generate report 181 | print(f"\nGenerating report to {args.output}...") 182 | report_url = benchmark.generate_report( 183 | output_path=args.output, 184 | upload=args.upload, 185 | upload_service=args.upload_service 186 | ) 187 | 188 | if report_url: 189 | print(f"Report uploaded to: {report_url}") 190 | print("You can share this URL to let others view your benchmark results.") 191 | 192 | # Calculate speedup if we have both CPU and ANE results 193 | if len(benchmark.results) >= 2: 194 | cpu_results = [r for r in benchmark.results if r.backend == "CPU"] 195 | ane_results = [r for r in benchmark.results if r.backend == "ANE"] 196 | 197 | if cpu_results and ane_results: 198 | cpu_time = cpu_results[0].inference_time_ms 199 | ane_time = ane_results[0].inference_time_ms 200 | speedup = cpu_time / ane_time if ane_time > 0 else 0 201 | 202 | print("\n=== Benchmark Summary ===") 203 | print(f"Model: {args.model}") 204 | print(f"CPU Inference: {cpu_time:.2f} ms") 205 | print(f"ANE Inference: {ane_time:.2f} ms") 206 | print(f"Speedup: {speedup:.2f}x") 207 | 208 | except Exception as e: 209 | print(f"Error benchmarking model: {e}") 210 | return 1 211 | 212 | print("\nBenchmark complete!") 213 | return 0 214 | 215 | 216 | if __name__ == "__main__": 217 | sys.exit(main()) -------------------------------------------------------------------------------- /anemll_bench/utils/ane_verification.py: -------------------------------------------------------------------------------- 1 | """ 2 | Apple Neural Engine (ANE) Verification Utility 3 | 4 | This utility helps diagnose ANE availability and model execution issues. 5 | """ 6 | 7 | import platform 8 | import subprocess 9 | import sys 10 | from typing import Dict, Any, Optional 11 | 12 | 13 | def check_ane_hardware() -> Dict[str, Any]: 14 | """ 15 | Check if ANE hardware is available on the system. 16 | 17 | Returns: 18 | Dictionary with ANE hardware information 19 | """ 20 | ane_info = { 21 | 'hardware_available': False, 22 | 'platform': platform.system(), 23 | 'architecture': platform.machine(), 24 | 'macos_version': None, 25 | 'chip_model': None, 26 | 'ane_cores': None, 27 | 'issues': [] 28 | } 29 | 30 | # Check if we're on macOS 31 | if platform.system() != 'Darwin': 32 | ane_info['issues'].append('Not running on macOS') 33 | return ane_info 34 | 35 | # Check if we're on Apple Silicon 36 | if platform.machine() != 'arm64': 37 | ane_info['issues'].append('Not running on Apple Silicon (arm64)') 38 | return ane_info 39 | 40 | ane_info['hardware_available'] = True 41 | 42 | # Get macOS version 43 | try: 44 | macos_version = platform.mac_ver()[0] 45 | ane_info['macos_version'] = macos_version 46 | 47 | # Check macOS version compatibility 48 | major_version = int(macos_version.split('.')[0]) 49 | if major_version < 14: 50 | ane_info['issues'].append(f'macOS {major_version} has limited ANE support') 51 | elif major_version >= 15: 52 | ane_info['issues'].append(f'macOS {major_version} has enhanced ANE support') 53 | except Exception as e: 54 | ane_info['issues'].append(f'Could not determine macOS version: {e}') 55 | 56 | # Try to get chip model 57 | try: 58 | result = subprocess.run(['sysctl', '-n', 'machdep.cpu.brand_string'], 59 | capture_output=True, text=True, timeout=5) 60 | if result.returncode == 0: 61 | chip_model = result.stdout.strip() 62 | ane_info['chip_model'] = chip_model 63 | 64 | # Map chip models to ANE cores 65 | if 'M1' in chip_model: 66 | if 'Ultra' in chip_model: 67 | ane_info['ane_cores'] = 64 68 | elif 'Max' in chip_model: 69 | ane_info['ane_cores'] = 32 70 | else: 71 | ane_info['ane_cores'] = 16 72 | elif 'M2' in chip_model: 73 | if 'Ultra' in chip_model: 74 | ane_info['ane_cores'] = 64 75 | elif 'Max' in chip_model: 76 | ane_info['ane_cores'] = 32 77 | else: 78 | ane_info['ane_cores'] = 16 79 | elif 'M3' in chip_model: 80 | if 'Ultra' in chip_model: 81 | ane_info['ane_cores'] = 64 82 | elif 'Max' in chip_model: 83 | ane_info['ane_cores'] = 32 84 | else: 85 | ane_info['ane_cores'] = 16 86 | elif 'M4' in chip_model: 87 | if 'Max' in chip_model: 88 | ane_info['ane_cores'] = 32 89 | else: 90 | ane_info['ane_cores'] = 16 91 | else: 92 | ane_info['issues'].append('Could not determine chip model') 93 | except Exception as e: 94 | ane_info['issues'].append(f'Error getting chip model: {e}') 95 | 96 | return ane_info 97 | 98 | 99 | def check_coreml_ane_support() -> Dict[str, Any]: 100 | """ 101 | Check CoreML ANE support and configuration. 102 | 103 | Returns: 104 | Dictionary with CoreML ANE information 105 | """ 106 | coreml_info = { 107 | 'coreml_available': False, 108 | 'version': None, 109 | 'ane_support': False, 110 | 'compute_units': [], 111 | 'issues': [] 112 | } 113 | 114 | try: 115 | import coremltools as ct 116 | coreml_info['coreml_available'] = True 117 | coreml_info['version'] = ct.__version__ 118 | 119 | # Check available compute units 120 | try: 121 | compute_units = [str(cu) for cu in ct.ComputeUnit] 122 | coreml_info['compute_units'] = compute_units 123 | 124 | # Check if ANE compute units are available 125 | ane_units = [cu for cu in compute_units if 'NE' in cu or 'ALL' in cu] 126 | if ane_units: 127 | coreml_info['ane_support'] = True 128 | else: 129 | coreml_info['issues'].append('No ANE compute units found') 130 | 131 | except Exception as e: 132 | coreml_info['issues'].append(f'Error checking compute units: {e}') 133 | 134 | except ImportError: 135 | coreml_info['issues'].append('CoreML Tools not installed') 136 | except Exception as e: 137 | coreml_info['issues'].append(f'Error importing CoreML Tools: {e}') 138 | 139 | return coreml_info 140 | 141 | 142 | def verify_model_ane_compatibility(model_path: str) -> Dict[str, Any]: 143 | """ 144 | Verify if a model is compatible with ANE execution. 145 | 146 | Args: 147 | model_path: Path to the CoreML model 148 | 149 | Returns: 150 | Dictionary with model ANE compatibility information 151 | """ 152 | model_info = { 153 | 'model_exists': False, 154 | 'model_format': None, 155 | 'ane_optimized': False, 156 | 'ml_program': False, 157 | 'issues': [] 158 | } 159 | 160 | import os 161 | 162 | # Check if model exists 163 | if not os.path.exists(model_path): 164 | model_info['issues'].append(f'Model file not found: {model_path}') 165 | return model_info 166 | 167 | model_info['model_exists'] = True 168 | 169 | # Determine model format 170 | if model_path.endswith('.mlmodelc'): 171 | model_info['model_format'] = 'Compiled (.mlmodelc)' 172 | elif model_path.endswith('.mlmodel'): 173 | model_info['model_format'] = 'Uncompiled (.mlmodel)' 174 | elif model_path.endswith('.mlpackage'): 175 | model_info['model_format'] = 'Package (.mlpackage)' 176 | else: 177 | model_info['issues'].append(f'Unknown model format: {model_path}') 178 | 179 | # Try to load and analyze the model 180 | try: 181 | import coremltools as ct 182 | 183 | # Load model 184 | if model_path.endswith('.mlmodelc'): 185 | model = ct.models.CompiledMLModel(model_path) 186 | else: 187 | model = ct.models.MLModel(model_path) 188 | 189 | # Get model spec 190 | spec = model.get_spec() 191 | 192 | # Check if it's an ML Program (ANE-optimized) 193 | if hasattr(spec, 'mlProgram') and spec.mlProgram: 194 | model_info['ml_program'] = True 195 | model_info['ane_optimized'] = True 196 | else: 197 | model_info['issues'].append('Model is not an ML Program (may not be ANE-optimized)') 198 | 199 | # Check spec version 200 | spec_version = spec.specificationVersion 201 | if spec_version < 5: 202 | model_info['issues'].append(f'Low spec version ({spec_version}), may not support ANE') 203 | 204 | except Exception as e: 205 | model_info['issues'].append(f'Error analyzing model: {e}') 206 | 207 | return model_info 208 | 209 | 210 | def run_ane_diagnostic() -> Dict[str, Any]: 211 | """ 212 | Run a comprehensive ANE diagnostic. 213 | 214 | Returns: 215 | Dictionary with complete ANE diagnostic information 216 | """ 217 | print("Running ANE Diagnostic...") 218 | print("=" * 50) 219 | 220 | diagnostic = { 221 | 'hardware': check_ane_hardware(), 222 | 'coreml': check_coreml_ane_support(), 223 | 'overall_status': 'Unknown' 224 | } 225 | 226 | # Determine overall status 227 | hardware_ok = diagnostic['hardware']['hardware_available'] 228 | coreml_ok = diagnostic['coreml']['coreml_available'] and diagnostic['coreml']['ane_support'] 229 | 230 | if hardware_ok and coreml_ok: 231 | diagnostic['overall_status'] = 'ANE should be available' 232 | elif hardware_ok and not coreml_ok: 233 | diagnostic['overall_status'] = 'Hardware OK, CoreML issues' 234 | elif not hardware_ok: 235 | diagnostic['overall_status'] = 'Hardware not available' 236 | else: 237 | diagnostic['overall_status'] = 'Unknown issues' 238 | 239 | # Print results 240 | print(f"\nHardware Check:") 241 | print(f" - ANE Hardware Available: {hardware_ok}") 242 | if diagnostic['hardware']['chip_model']: 243 | print(f" - Chip Model: {diagnostic['hardware']['chip_model']}") 244 | if diagnostic['hardware']['ane_cores']: 245 | print(f" - ANE Cores: {diagnostic['hardware']['ane_cores']}") 246 | if diagnostic['hardware']['macos_version']: 247 | print(f" - macOS Version: {diagnostic['hardware']['macos_version']}") 248 | 249 | print(f"\nCoreML Check:") 250 | print(f" - CoreML Available: {coreml_ok}") 251 | if diagnostic['coreml']['version']: 252 | print(f" - CoreML Version: {diagnostic['coreml']['version']}") 253 | print(f" - ANE Support: {diagnostic['coreml']['ane_support']}") 254 | 255 | print(f"\nOverall Status: {diagnostic['overall_status']}") 256 | 257 | # Print issues 258 | all_issues = [] 259 | all_issues.extend(diagnostic['hardware']['issues']) 260 | all_issues.extend(diagnostic['coreml']['issues']) 261 | 262 | if all_issues: 263 | print(f"\nIssues Found:") 264 | for issue in all_issues: 265 | print(f" - {issue}") 266 | 267 | return diagnostic 268 | 269 | 270 | if __name__ == "__main__": 271 | # Run diagnostic when executed directly 272 | diagnostic = run_ane_diagnostic() 273 | 274 | # Exit with appropriate code 275 | if diagnostic['overall_status'] == 'ANE should be available': 276 | sys.exit(0) 277 | else: 278 | sys.exit(1) 279 | -------------------------------------------------------------------------------- /examples/profile_coreml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Profile script for CoreML models using ANEMLL-Bench 4 | Similar to profile_split.py but using the ANEMLL-Bench package 5 | """ 6 | 7 | import os 8 | import sys 9 | import argparse 10 | import json 11 | import time 12 | import coremltools as ct 13 | import numpy as np 14 | import webbrowser 15 | from pathlib import Path 16 | 17 | # Add parent directory to path for development imports 18 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 19 | 20 | from anemll_bench import Benchmark 21 | from anemll_bench.utils.system_info import get_system_info 22 | from anemll_bench.models.coreml_adapter import ( 23 | load_coreml_model, 24 | profile_coreml_model, 25 | prepare_inputs, 26 | get_model_size, 27 | benchmark_coreml_model_file, 28 | get_model_metadata 29 | ) 30 | 31 | 32 | def parse_args(): 33 | """Parse command line arguments""" 34 | parser = argparse.ArgumentParser(description='Profile CoreML models on Apple Neural Engine') 35 | 36 | # Model options 37 | parser.add_argument('--model', type=str, required=True, 38 | help='Path to CoreML model (.mlmodel or .mlmodelc)') 39 | parser.add_argument('--batch-size', type=int, default=1, 40 | help='Batch size for profiling') 41 | parser.add_argument('--sequence-length', type=int, default=512, 42 | help='Sequence length for text models') 43 | parser.add_argument('--hidden-size', type=int, default=4096, 44 | help='Hidden size for text models') 45 | 46 | # Benchmark options 47 | parser.add_argument('--iterations', type=int, default=1000, 48 | help='Number of iterations for profiling') 49 | parser.add_argument('--compute-units', type=str, default='CPU_AND_NE', 50 | choices=['CPU_AND_NE', 'CPU_ONLY', 'ALL'], 51 | help='Compute units to use for inference') 52 | 53 | # Output options 54 | parser.add_argument('--output', type=str, default=None, 55 | help='Path to save benchmark results (JSON)') 56 | parser.add_argument('--report', type=str, default='profile_report.html', 57 | help='Path to save HTML report') 58 | parser.add_argument('--include-charts', action='store_true', 59 | help='Include performance charts in HTML report (disabled by default)') 60 | parser.add_argument('--upload', action='store_true', 61 | help='Upload report to sharing service') 62 | parser.add_argument('--upload-service', type=str, default='jsonbin', 63 | choices=['gist', 'pastebin', 'jsonbin'], 64 | help='Service to upload report to') 65 | 66 | # Extra options 67 | parser.add_argument('--compare-cpu', action='store_true', 68 | help='Compare with CPU-only performance (disabled by default)') 69 | parser.add_argument('--tflops', type=float, default=None, 70 | help='Specify the total number of trillion floating point operations (TFLOPs) per iteration (not TFLOPS rate)') 71 | 72 | args = parser.parse_args() 73 | 74 | return args 75 | 76 | 77 | def print_model_info(model_path): 78 | """Print basic information about the model""" 79 | size_bytes = get_model_size(model_path) 80 | size_mb = size_bytes / (1024 * 1024) 81 | 82 | # Get weights-only size 83 | weights_bytes = get_model_size(model_path, weights_only=True) 84 | weights_mb = weights_bytes / (1024 * 1024) 85 | 86 | print(f"\n=== Model Information ===") 87 | print(f"Path: {model_path}") 88 | print(f"Total Size: {size_mb:.2f} MB ({size_bytes:,} bytes)") 89 | print(f"Weights Size: {weights_mb:.2f} MB ({weights_bytes:,} bytes)") 90 | print(f"Weights Percentage: {(weights_bytes/size_bytes)*100:.1f}% of total size") 91 | 92 | # Try to load the model to get more info 93 | try: 94 | model = load_coreml_model(model_path, compute_units="CPU_ONLY") # Use CPU for quick loading 95 | 96 | # Get metadata for detailed model information 97 | metadata = get_model_metadata(model) 98 | if "type" in metadata: 99 | print(f"Model Type: {metadata['type']}") 100 | if "hidden_size" in metadata: 101 | print(f"Hidden Size: {metadata['hidden_size']}") 102 | if "vocab_size" in metadata: 103 | print(f"Vocabulary Size: {metadata['vocab_size']}") 104 | 105 | # Print input information 106 | print("\nInputs:") 107 | for input_info in metadata.get("inputs", []): 108 | name = input_info.get("name", "unknown") 109 | shape = input_info.get("shape", "unknown") 110 | data_type = input_info.get("data_type", "unknown") 111 | print(f" - {name}: shape={shape}, type={data_type}") 112 | 113 | # Print output information 114 | print("\nOutputs:") 115 | for output_info in metadata.get("outputs", []): 116 | name = output_info.get("name", "unknown") 117 | shape = output_info.get("shape", "unknown") 118 | data_type = output_info.get("data_type", "unknown") 119 | print(f" - {name}: shape={shape}, type={data_type}") 120 | 121 | except Exception as e: 122 | print(f"Error getting model details: {e}") 123 | 124 | print("===========================\n") 125 | 126 | 127 | def main(): 128 | """Main entry point""" 129 | args = parse_args() 130 | 131 | # Display system info 132 | system_info = get_system_info() 133 | print("\n=== System Information ===") 134 | print(f"Mac Model: {system_info.get('mac_model', 'Unknown')}") 135 | 136 | # Use the user-friendly macOS version if available, otherwise fall back to the old format 137 | if 'macos_version' in system_info: 138 | print(f"OS: {system_info['macos_version']}") 139 | else: 140 | print(f"OS: {system_info.get('os', {}).get('name', 'Unknown')} " 141 | f"{system_info.get('os', {}).get('release', '')}") 142 | 143 | print(f"CPU: {system_info.get('cpu', {}).get('brand', 'Unknown')}") 144 | print(f"RAM: {system_info.get('ram', {}).get('total_gb', 'Unknown')} GB") 145 | print(f"Apple Silicon: {'Yes' if system_info.get('apple_silicon', False) else 'No'}") 146 | print("===========================\n") 147 | 148 | # Print model info 149 | print_model_info(args.model) 150 | 151 | # Initialize benchmark 152 | benchmark = Benchmark() 153 | 154 | # Benchmark with the requested compute units 155 | print(f"Profiling with compute units: {args.compute_units}") 156 | print(f"Using specified hidden size: {args.hidden_size}") 157 | if args.tflops is not None: 158 | print(f"Using provided TFLOPS value: {args.tflops}") 159 | 160 | result = benchmark.benchmark_coreml_file( 161 | model_path=args.model, 162 | num_runs=args.iterations, 163 | batch_size=args.batch_size, 164 | sequence_length=args.sequence_length, 165 | hidden_size=args.hidden_size, 166 | compute_units=args.compute_units, 167 | known_tflops=args.tflops 168 | ) 169 | 170 | # Also benchmark with CPU only if requested 171 | if args.compare_cpu and args.compute_units != "CPU_ONLY": 172 | print("\nComparing with CPU-only performance...") 173 | cpu_result = benchmark.benchmark_coreml_file( 174 | model_path=args.model, 175 | num_runs=args.iterations, 176 | batch_size=args.batch_size, 177 | sequence_length=args.sequence_length, 178 | hidden_size=args.hidden_size, 179 | compute_units="CPU_ONLY", 180 | known_tflops=args.tflops 181 | ) 182 | 183 | # Calculate speedup 184 | ane_time = result.inference_time_ms 185 | cpu_time = cpu_result.inference_time_ms 186 | ane_gbps = result.throughput_gb_s 187 | cpu_gbps = cpu_result.throughput_gb_s 188 | ane_tflops = result.tflops 189 | cpu_tflops = cpu_result.tflops 190 | 191 | speedup = cpu_time / ane_time if ane_time > 0 else 0 192 | gbps_ratio = ane_gbps / cpu_gbps if cpu_gbps > 0 else 0 193 | 194 | print("\n=== Performance Comparison ===") 195 | print(f"ANE Inference: {ane_time:.2f} ms") 196 | print(f"CPU Inference: {cpu_time:.2f} ms") 197 | print(f"Time Speedup: {speedup:.2f}x") 198 | print(f"ANE Throughput: {ane_gbps:.2f} GB/s") 199 | print(f"CPU Throughput: {cpu_gbps:.2f} GB/s") 200 | print(f"Throughput Ratio: {gbps_ratio:.2f}x") 201 | 202 | # Only print TFLOPS information if TFLOPS values are available 203 | if ane_tflops is not None and cpu_tflops is not None: 204 | tflops_ratio = ane_tflops / cpu_tflops if cpu_tflops > 0 else 0 205 | print(f"ANE TFLOPS: {ane_tflops:.4f}") 206 | print(f"CPU TFLOPS: {cpu_tflops:.4f}") 207 | print(f"TFLOPS Ratio: {tflops_ratio:.2f}x") 208 | 209 | print("=============================\n") 210 | 211 | # Generate report 212 | report_url = benchmark.generate_report( 213 | output_path=args.report, 214 | upload=args.upload, 215 | upload_service=args.upload_service, 216 | include_charts=args.include_charts 217 | ) 218 | 219 | if report_url: 220 | print(f"Report uploaded to: {report_url}") 221 | 222 | # Open the HTML report in the default web browser 223 | report_path = os.path.abspath(args.report) 224 | print(f"Opening report: {report_path}") 225 | #webbrowser.open(f"file://{report_path}", new=2) 226 | 227 | # Save JSON results if requested 228 | if args.output: 229 | # Create results dictionary 230 | results = { 231 | "model_path": args.model, 232 | "model_size_bytes": get_model_size(args.model), 233 | "model_size_mb": get_model_size(args.model) / (1024 * 1024), 234 | "batch_size": args.batch_size, 235 | "sequence_length": args.sequence_length, 236 | "hidden_size": args.hidden_size, 237 | "iterations": args.iterations, 238 | "compute_units": args.compute_units, 239 | "inference_time_ms": result.inference_time_ms, 240 | "throughput_gbps": result.throughput_gbps, 241 | "tflops": result.tflops, 242 | "system_info": system_info, 243 | "timestamp": time.strftime("%Y-%m-%d %H:%M:%S") 244 | } 245 | 246 | # Save to file 247 | with open(args.output, 'w') as f: 248 | json.dump(results, f, indent=2) 249 | 250 | print(f"Results saved to: {args.output}") 251 | 252 | print("\nProfile complete!") 253 | return 0 254 | 255 | 256 | if __name__ == "__main__": 257 | sys.exit(main()) -------------------------------------------------------------------------------- /anemll_bench/utils/system_info.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions to collect system information for benchmarking reports 3 | """ 4 | 5 | import platform 6 | import os 7 | import subprocess 8 | import json 9 | import psutil 10 | 11 | 12 | def get_mac_model_identifier(): 13 | """Get the Mac model identifier, e.g., 'MacBookPro18,3'""" 14 | try: 15 | result = subprocess.run(['sysctl', '-n', 'hw.model'], 16 | capture_output=True, text=True, check=True) 17 | return result.stdout.strip() 18 | except: 19 | return "Unknown Mac Model" 20 | 21 | 22 | def get_macos_version(): 23 | """Get a more user-friendly macOS version string""" 24 | if platform.system() != "Darwin": 25 | return None 26 | 27 | try: 28 | # Try using sw_vers command for more accurate information 29 | product_name = subprocess.run(['sw_vers', '-productName'], 30 | capture_output=True, text=True, check=True).stdout.strip() 31 | product_version = subprocess.run(['sw_vers', '-productVersion'], 32 | capture_output=True, text=True, check=True).stdout.strip() 33 | build_version = subprocess.run(['sw_vers', '-buildVersion'], 34 | capture_output=True, text=True, check=True).stdout.strip() 35 | 36 | # Map of macOS version numbers to marketing names (as backup if productName doesn't work) 37 | version_names = { 38 | "10.15": "Catalina", 39 | "11": "Big Sur", 40 | "12": "Monterey", 41 | "13": "Ventura", 42 | "14": "Sonoma", 43 | "15": "Sequoia" 44 | } 45 | 46 | # If product_name is not "macOS", use a more descriptive name 47 | if product_name and product_name != "macOS": 48 | return f"{product_name} {product_version} ({build_version})" 49 | 50 | # Otherwise determine name from version number 51 | major_version = product_version.split('.')[0] 52 | if major_version == "10": 53 | # For older versions, we need the second digit too 54 | major_minor = '.'.join(product_version.split('.')[:2]) 55 | version_name = version_names.get(major_minor, "") 56 | else: 57 | version_name = version_names.get(major_version, "") 58 | 59 | # Format the full version string 60 | if version_name: 61 | return f"macOS {version_name} {product_version} ({build_version})" 62 | else: 63 | return f"macOS {product_version} ({build_version})" 64 | except: 65 | # Fall back to platform.mac_ver() if the commands fail 66 | try: 67 | mac_ver = platform.mac_ver() 68 | return f"macOS {mac_ver[0]}" 69 | except: 70 | # Ultimate fallback to platform.version() 71 | return f"macOS {platform.version()}" 72 | 73 | 74 | def get_cpu_info(): 75 | """Get detailed CPU information""" 76 | info = { 77 | 'brand': platform.processor(), 78 | 'architecture': platform.machine(), 79 | 'cores': psutil.cpu_count(logical=False), 80 | 'threads': psutil.cpu_count(logical=True), 81 | } 82 | 83 | # Get more detailed Apple Silicon info if available 84 | if platform.machine() == "arm64": 85 | try: 86 | result = subprocess.run(['sysctl', '-n', 'machdep.cpu.brand_string'], 87 | capture_output=True, text=True, check=True) 88 | info['brand'] = result.stdout.strip() 89 | except: 90 | pass 91 | 92 | return info 93 | 94 | 95 | def get_ram_info(): 96 | """Get RAM information""" 97 | mem = psutil.virtual_memory() 98 | return { 99 | 'total_gb': round(mem.total / (1024**3), 2), 100 | 'available_gb': round(mem.available / (1024**3), 2), 101 | } 102 | 103 | 104 | def get_ane_info(): 105 | """ 106 | Get detailed Apple Neural Engine (ANE) information for Apple Silicon devices. 107 | 108 | Returns: 109 | Dictionary with ANE capabilities and specifications 110 | """ 111 | ane_info = { 112 | 'available': True, 113 | 'cores': 'Unknown', 114 | 'tflops': 'Unknown', 115 | 'chip_model': 'Unknown', 116 | 'ane_generation': 'Unknown', 117 | 'capabilities': [] 118 | } 119 | 120 | try: 121 | # Get Mac model identifier 122 | mac_model = get_mac_model_identifier() 123 | if mac_model: 124 | ane_info['chip_model'] = mac_model 125 | 126 | # Map Mac models to ANE specifications 127 | ane_specs = { 128 | # M1 Series 129 | 'MacBookAir10,1': {'cores': 16, 'generation': 'M1', 'tflops': '11'}, 130 | 'MacBookPro17,1': {'cores': 16, 'generation': 'M1', 'tflops': '11'}, 131 | 'Macmini9,1': {'cores': 16, 'generation': 'M1', 'tflops': '11'}, 132 | 'iMac21,1': {'cores': 16, 'generation': 'M1', 'tflops': '11'}, 133 | 'iMac21,2': {'cores': 16, 'generation': 'M1', 'tflops': '11'}, 134 | 135 | # M1 Pro 136 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M1 Pro', 'tflops': '11'}, 137 | 'MacBookPro18,2': {'cores': 16, 'generation': 'M1 Pro', 'tflops': '11'}, 138 | 'MacBookPro18,3': {'cores': 16, 'generation': 'M1 Pro', 'tflops': '11'}, 139 | 'MacBookPro18,4': {'cores': 16, 'generation': 'M1 Pro', 'tflops': '11'}, 140 | 141 | # M1 Max 142 | 'MacBookPro18,1': {'cores': 32, 'generation': 'M1 Max', 'tflops': '22'}, 143 | 'MacBookPro18,2': {'cores': 32, 'generation': 'M1 Max', 'tflops': '22'}, 144 | 'MacBookPro18,3': {'cores': 32, 'generation': 'M1 Max', 'tflops': '22'}, 145 | 'MacBookPro18,4': {'cores': 32, 'generation': 'M1 Max', 'tflops': '22'}, 146 | 147 | # M1 Ultra 148 | 'MacStudio1,1': {'cores': 64, 'generation': 'M1 Ultra', 'tflops': '44'}, 149 | 150 | # M2 Series 151 | 'MacBookAir13,2': {'cores': 16, 'generation': 'M2', 'tflops': '15.8'}, 152 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M2', 'tflops': '15.8'}, 153 | 'Macmini9,1': {'cores': 16, 'generation': 'M2', 'tflops': '15.8'}, 154 | 155 | # M2 Pro 156 | 'MacBookPro19,1': {'cores': 16, 'generation': 'M2 Pro', 'tflops': '15.8'}, 157 | 'MacBookPro19,2': {'cores': 16, 'generation': 'M2 Pro', 'tflops': '15.8'}, 158 | 159 | # M2 Max 160 | 'MacBookPro19,1': {'cores': 32, 'generation': 'M2 Max', 'tflops': '31.6'}, 161 | 'MacBookPro19,2': {'cores': 32, 'generation': 'M2 Max', 'tflops': '31.6'}, 162 | 163 | # M2 Ultra 164 | 'MacPro7,1': {'cores': 64, 'generation': 'M2 Ultra', 'tflops': '63.2'}, 165 | 166 | # M3 Series 167 | 'MacBookAir15,1': {'cores': 16, 'generation': 'M3', 'tflops': '18'}, 168 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M3', 'tflops': '18'}, 169 | 'iMac24,1': {'cores': 16, 'generation': 'M3', 'tflops': '18'}, 170 | 171 | # M3 Pro 172 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M3 Pro', 'tflops': '18'}, 173 | 'MacBookPro18,2': {'cores': 16, 'generation': 'M3 Pro', 'tflops': '18'}, 174 | 175 | # M3 Max 176 | 'MacBookPro18,1': {'cores': 32, 'generation': 'M3 Max', 'tflops': '36'}, 177 | 'MacBookPro18,2': {'cores': 32, 'generation': 'M3 Max', 'tflops': '36'}, 178 | 179 | # M3 Ultra 180 | 'MacStudio1,1': {'cores': 64, 'generation': 'M3 Ultra', 'tflops': '72'}, 181 | 182 | # M4 Series 183 | 'MacBookAir15,1': {'cores': 16, 'generation': 'M4', 'tflops': '38'}, 184 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M4', 'tflops': '38'}, 185 | 186 | # M4 Pro 187 | 'MacBookPro18,1': {'cores': 16, 'generation': 'M4 Pro', 'tflops': '38'}, 188 | 'MacBookPro18,2': {'cores': 16, 'generation': 'M4 Pro', 'tflops': '38'}, 189 | 190 | # M4 Max 191 | 'MacBookPro18,1': {'cores': 32, 'generation': 'M4 Max', 'tflops': '76'}, 192 | 'MacBookPro18,2': {'cores': 32, 'generation': 'M4 Max', 'tflops': '76'}, 193 | } 194 | 195 | if mac_model in ane_specs: 196 | specs = ane_specs[mac_model] 197 | ane_info.update(specs) 198 | else: 199 | # Try to infer from model name patterns 200 | if 'MacBookAir' in mac_model: 201 | ane_info.update({'cores': 16, 'generation': 'Unknown M-series', 'tflops': 'Unknown'}) 202 | elif 'MacBookPro' in mac_model: 203 | ane_info.update({'cores': 16, 'generation': 'Unknown M-series Pro/Max', 'tflops': 'Unknown'}) 204 | elif 'MacStudio' in mac_model: 205 | ane_info.update({'cores': 64, 'generation': 'Unknown M-series Ultra', 'tflops': 'Unknown'}) 206 | elif 'iMac' in mac_model: 207 | ane_info.update({'cores': 16, 'generation': 'Unknown M-series', 'tflops': 'Unknown'}) 208 | elif 'Macmini' in mac_model: 209 | ane_info.update({'cores': 16, 'generation': 'Unknown M-series', 'tflops': 'Unknown'}) 210 | 211 | # Add capabilities based on macOS version 212 | macos_version = get_macos_version() 213 | if macos_version: 214 | if macos_version >= '15.0': 215 | ane_info['capabilities'].extend([ 216 | 'Enhanced ANE support', 217 | 'Improved CoreML integration', 218 | 'Better memory management' 219 | ]) 220 | elif macos_version >= '14.0': 221 | ane_info['capabilities'].extend([ 222 | 'Standard ANE support', 223 | 'CoreML integration', 224 | 'Basic memory management' 225 | ]) 226 | else: 227 | ane_info['capabilities'].extend([ 228 | 'Limited ANE support', 229 | 'Legacy CoreML integration' 230 | ]) 231 | 232 | # Add general capabilities 233 | ane_info['capabilities'].extend([ 234 | 'Neural network acceleration', 235 | 'Machine learning inference', 236 | 'CoreML model execution' 237 | ]) 238 | 239 | except Exception as e: 240 | print(f"Warning: Could not determine detailed ANE info: {e}") 241 | ane_info['error'] = str(e) 242 | 243 | return ane_info 244 | 245 | 246 | def get_system_info(): 247 | """Collect comprehensive system information""" 248 | system_info = { 249 | 'os': { 250 | 'name': platform.system(), 251 | 'version': platform.version(), 252 | 'release': platform.release(), 253 | }, 254 | 'cpu': get_cpu_info(), 255 | 'ram': get_ram_info(), 256 | 'python_version': platform.python_version(), 257 | 'mac_model': get_mac_model_identifier() if platform.system() == "Darwin" else None, 258 | } 259 | 260 | # Get a more user-friendly macOS version if on macOS 261 | if platform.system() == "Darwin": 262 | system_info['macos_version'] = get_macos_version() 263 | 264 | # Check if we're running on Apple Silicon 265 | system_info['apple_silicon'] = platform.machine() == "arm64" and platform.system() == "Darwin" 266 | 267 | # Get detailed ANE information if on Apple Silicon 268 | if system_info['apple_silicon']: 269 | ane_info = get_ane_info() 270 | system_info['neural_engine'] = ane_info 271 | else: 272 | system_info['neural_engine'] = { 273 | 'available': False, 274 | 'reason': 'Not running on Apple Silicon' 275 | } 276 | 277 | return system_info 278 | 279 | 280 | if __name__ == "__main__": 281 | # Print system info when run directly 282 | info = get_system_info() 283 | print(json.dumps(info, indent=2)) -------------------------------------------------------------------------------- /anemll_bench/models/model_syncer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for synchronizing and managing model files. 3 | """ 4 | 5 | import os 6 | import logging 7 | import pathlib 8 | import yaml 9 | import requests 10 | import zipfile 11 | import shutil 12 | from typing import Dict, Any, List, Optional 13 | 14 | # Set up logging 15 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 16 | logger = logging.getLogger(__name__) 17 | 18 | # Constants for cache paths 19 | # Use ~/.cache/anemll-bench/ for cache storage 20 | HOME_DIR = str(pathlib.Path.home()) 21 | CACHE_DIR = os.path.join(HOME_DIR, ".cache", "anemll-bench") 22 | META_FILE_PATH = os.path.join(CACHE_DIR, "meta.yalm") 23 | MODELS_CACHE_DIR = os.path.join(CACHE_DIR, "models") 24 | 25 | # Ensure cache directories exist 26 | os.makedirs(CACHE_DIR, exist_ok=True) 27 | os.makedirs(MODELS_CACHE_DIR, exist_ok=True) 28 | 29 | class ModelSyncer: 30 | """ 31 | Class responsible for synchronizing and managing model files. 32 | """ 33 | 34 | def __init__(self): 35 | """ 36 | Initialize the ModelSyncer. 37 | """ 38 | # Make sure cache directories exist 39 | os.makedirs(CACHE_DIR, exist_ok=True) 40 | os.makedirs(MODELS_CACHE_DIR, exist_ok=True) 41 | 42 | def get_model_dir(self) -> str: 43 | """ 44 | Get the path to the model directory. 45 | 46 | Returns: 47 | Path to the model directory 48 | """ 49 | return MODELS_CACHE_DIR 50 | 51 | def read_meta_file(self) -> Dict: 52 | """ 53 | Read the meta.yalm file containing model information. 54 | 55 | Returns: 56 | Dictionary containing model information 57 | """ 58 | if not os.path.exists(META_FILE_PATH): 59 | logger.warning(f"Meta file not found at {META_FILE_PATH}") 60 | return {} 61 | 62 | try: 63 | with open(META_FILE_PATH, 'r') as file: 64 | meta_data = yaml.safe_load(file) 65 | return meta_data 66 | except Exception as e: 67 | logger.error(f"Error reading meta file: {e}") 68 | return {} 69 | 70 | def download_meta_file(self, force_update: bool = False) -> Dict: 71 | """ 72 | Download the meta.yalm file from Hugging Face and store it locally. 73 | 74 | Args: 75 | force_update: If True, force download even if local file exists 76 | 77 | Returns: 78 | Dictionary containing model information 79 | """ 80 | # URL to the meta.yalm file on Hugging Face 81 | meta_url = "https://huggingface.co/anemll/anemll-bench/raw/main/meta.yalm" 82 | 83 | # Check if we should download 84 | if not force_update and os.path.exists(META_FILE_PATH): 85 | logger.info(f"Using existing meta file at {META_FILE_PATH}") 86 | return self.read_meta_file() 87 | 88 | # Download the meta file 89 | logger.info(f"Downloading meta file from {meta_url}") 90 | try: 91 | response = requests.get(meta_url) 92 | response.raise_for_status() 93 | 94 | # Save the file 95 | os.makedirs(os.path.dirname(META_FILE_PATH), exist_ok=True) 96 | with open(META_FILE_PATH, 'w') as file: 97 | file.write(response.text) 98 | 99 | logger.info(f"Meta file downloaded and saved to {META_FILE_PATH}") 100 | return yaml.safe_load(response.text) 101 | except Exception as e: 102 | logger.error(f"Error downloading meta file: {e}") 103 | 104 | # If we fail to download but have a local version, use that 105 | if os.path.exists(META_FILE_PATH): 106 | logger.warning("Using existing local meta file as fallback") 107 | return self.read_meta_file() 108 | 109 | return {} 110 | 111 | def download_model(self, url: str, model_name: str, model_type: str, force_redownload: bool = False, allow_redownload: bool = True) -> Optional[str]: 112 | """ 113 | Download a model from a URL. 114 | 115 | Args: 116 | url: URL to download the model from 117 | model_name: Name of the model 118 | model_type: Type of the model (mlmodelc or mlpackage) 119 | force_redownload: Whether to force re-download even if the model exists 120 | allow_redownload: Whether to allow redownloading if the zip file is corrupted 121 | 122 | Returns: 123 | Path to the downloaded model, or None if download failed 124 | """ 125 | # Define the model directory path 126 | expected_model_dir = os.path.join(MODELS_CACHE_DIR, f"{model_name}.{model_type}") 127 | 128 | # Check if the model already exists 129 | if os.path.exists(expected_model_dir) and not force_redownload: 130 | logger.info(f"Model already exists at {expected_model_dir}") 131 | return expected_model_dir 132 | 133 | # Create a temporary directory for downloads 134 | download_dir = os.path.join(CACHE_DIR, "downloads") 135 | os.makedirs(download_dir, exist_ok=True) 136 | 137 | # Define zip file path 138 | zip_path = os.path.join(download_dir, f"{model_name}.zip") 139 | 140 | # If forcing redownload, remove the existing zip file and model directory 141 | if force_redownload: 142 | if os.path.exists(zip_path): 143 | os.remove(zip_path) 144 | logger.info(f"Removed existing zip file: {zip_path}") 145 | if os.path.exists(expected_model_dir): 146 | shutil.rmtree(expected_model_dir, ignore_errors=True) 147 | logger.info(f"Removed existing model directory: {expected_model_dir}") 148 | 149 | # Check if the zip file already exists 150 | need_download = True 151 | if os.path.exists(zip_path) and not force_redownload: 152 | # Verify it's a valid zip file 153 | try: 154 | with zipfile.ZipFile(zip_path) as test_zip: 155 | test_zip.testzip() # This will check the integrity of the zip file 156 | logger.info(f"Zip file already exists at {zip_path}, using cached version") 157 | need_download = False 158 | except zipfile.BadZipFile: 159 | if allow_redownload: 160 | logger.warning(f"Existing zip file is corrupted: {zip_path}, re-downloading") 161 | os.remove(zip_path) 162 | need_download = True 163 | else: 164 | logger.error(f"Existing zip file is corrupted: {zip_path}, but redownload is disabled") 165 | return None 166 | except Exception as e: 167 | if allow_redownload: 168 | logger.warning(f"Error checking zip file {zip_path}: {e}, re-downloading") 169 | os.remove(zip_path) 170 | need_download = True 171 | else: 172 | logger.error(f"Error checking zip file {zip_path}: {e}, but redownload is disabled") 173 | return None 174 | 175 | # Download if needed 176 | if need_download: 177 | # Download the model 178 | logger.info(f"Downloading model from {url}") 179 | try: 180 | response = requests.get(url, stream=True) 181 | response.raise_for_status() 182 | 183 | # Save zip file 184 | with open(zip_path, 'wb') as f: 185 | for chunk in response.iter_content(chunk_size=8192): 186 | f.write(chunk) 187 | 188 | logger.info(f"Downloaded zip file to {zip_path}") 189 | except Exception as e: 190 | logger.error(f"Error downloading model: {e}") 191 | return None 192 | 193 | # Extract the model 194 | try: 195 | # Remove existing model directory if it exists 196 | if os.path.exists(expected_model_dir): 197 | logger.info(f"Removing existing model directory: {expected_model_dir}") 198 | shutil.rmtree(expected_model_dir, ignore_errors=True) 199 | 200 | # Verify it's a valid zip file 201 | try: 202 | with zipfile.ZipFile(zip_path) as test_zip: 203 | test_zip.testzip() 204 | except zipfile.BadZipFile: 205 | logger.error(f"Downloaded zip file is corrupted: {zip_path}") 206 | if not allow_redownload: 207 | logger.error("Redownload is disabled. Cannot recover from corrupted zip file.") 208 | return None 209 | elif force_redownload: 210 | logger.error("Already tried force redownload but zip is still corrupted. Giving up.") 211 | return None 212 | else: 213 | logger.info("Attempting to re-download with force_redownload=True") 214 | os.remove(zip_path) 215 | return self.download_model(url, model_name, model_type, force_redownload=True, allow_redownload=allow_redownload) 216 | 217 | # Extract the model 218 | with zipfile.ZipFile(zip_path) as zip_ref: 219 | # Get information about the zip contents 220 | file_list = zip_ref.namelist() 221 | logger.info(f"ZIP file contains {len(file_list)} files/directories") 222 | 223 | # Print the top-level directories/files 224 | top_level = set() 225 | for file_path in file_list: 226 | top_dir = file_path.split('/')[0] if '/' in file_path else file_path 227 | top_level.add(top_dir) 228 | 229 | logger.info(f"Top-level entries in ZIP: {', '.join(top_level)}") 230 | 231 | # Extract the model 232 | logger.info(f"Extracting ZIP file to {MODELS_CACHE_DIR}") 233 | zip_ref.extractall(MODELS_CACHE_DIR) 234 | 235 | # Remove any __MACOSX directory 236 | macosx_dir = os.path.join(MODELS_CACHE_DIR, "__MACOSX") 237 | if os.path.exists(macosx_dir): 238 | logger.info(f"Removing __MACOSX directory: {macosx_dir}") 239 | shutil.rmtree(macosx_dir, ignore_errors=True) 240 | 241 | # Check if the model directory exists 242 | if os.path.exists(expected_model_dir): 243 | logger.info(f"Model extracted to {expected_model_dir}") 244 | return expected_model_dir 245 | else: 246 | logger.error(f"Model directory not found after extraction: {expected_model_dir}") 247 | return None 248 | 249 | except Exception as e: 250 | logger.error(f"Error extracting model: {e}") 251 | return None 252 | 253 | def get_model_path(self, model_name: str, check_online: bool = True, force_redownload: bool = False) -> str: 254 | """ 255 | Get the path to a model, downloading it if necessary. 256 | 257 | Args: 258 | model_name: Name of the model 259 | check_online: Whether to check online for models not found locally 260 | force_redownload: Whether to force re-download the model even if it exists 261 | 262 | Returns: 263 | Path to the model 264 | """ 265 | from .model_loader import get_platform_specific_models 266 | 267 | # Try to find the model in platform-specific models 268 | platform_models = get_platform_specific_models(check_online=False) 269 | 270 | # Look for the model in local models 271 | model_config = None 272 | for config in platform_models: 273 | if config.get("name") == model_name: 274 | model_config = config 275 | break 276 | 277 | # If not found locally and check_online is True, check online 278 | if model_config is None and check_online: 279 | logger.info(f"Model '{model_name}' not found locally. Checking online...") 280 | platform_models = get_platform_specific_models(check_online=True) 281 | 282 | for config in platform_models: 283 | if config.get("name") == model_name: 284 | model_config = config 285 | break 286 | 287 | # If still not found, raise error 288 | if model_config is None: 289 | available_models = [m.get("name") for m in platform_models] 290 | msg = f"No model found with name: {model_name}. Available models: {available_models}" 291 | logger.error(msg) 292 | raise ValueError(msg) 293 | 294 | # Extract model info 295 | url = model_config.get("url") 296 | model_type = model_config.get("type") 297 | 298 | if not url or not model_type: 299 | raise ValueError(f"Invalid model configuration for {model_name}: missing url or type") 300 | 301 | # Download or get the model path 302 | model_path = self.download_model(url, model_name, model_type, force_redownload=force_redownload) 303 | 304 | if not model_path or not os.path.exists(model_path): 305 | raise ValueError(f"Failed to get model path for {model_name}") 306 | 307 | return model_path -------------------------------------------------------------------------------- /examples/batch_profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Batch profile script for multiple CoreML models using ANEMLL-Bench 4 | """ 5 | 6 | import os 7 | import sys 8 | import argparse 9 | import json 10 | import time 11 | import glob 12 | import webbrowser 13 | from pathlib import Path 14 | 15 | # Add parent directory to path for development imports 16 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 17 | 18 | from anemll_bench import Benchmark 19 | from anemll_bench.utils.system_info import get_system_info 20 | from anemll_bench.models.coreml_adapter import get_model_size 21 | 22 | 23 | def parse_args(): 24 | """Parse command line arguments""" 25 | parser = argparse.ArgumentParser(description='Batch profile multiple CoreML models') 26 | 27 | # Model options 28 | parser.add_argument('--models-dir', type=str, required=True, 29 | help='Directory containing CoreML models') 30 | parser.add_argument('--pattern', type=str, default='*.mlmodel*', 31 | help='Glob pattern to match model files (default: *.mlmodel*)') 32 | 33 | # Benchmark options 34 | parser.add_argument('--batch-size', type=int, default=1, 35 | help='Batch size for profiling') 36 | parser.add_argument('--sequence-length', type=int, default=512, 37 | help='Sequence length for text models') 38 | parser.add_argument('--hidden-size', type=int, default=4096, 39 | help='Hidden size for text models') 40 | parser.add_argument('--iterations', type=int, default=100, 41 | help='Number of iterations for profiling') 42 | parser.add_argument('--compute-units', type=str, default='CPU_AND_NE', 43 | choices=['CPU_AND_NE', 'CPU_ONLY', 'ALL'], 44 | help='Compute units to use for inference') 45 | 46 | # Output options 47 | parser.add_argument('--output-dir', type=str, default='./reports', 48 | help='Directory to save benchmark results and reports') 49 | parser.add_argument('--include-charts', action='store_true', 50 | help='Include performance charts in HTML report (disabled by default)') 51 | parser.add_argument('--upload', action='store_true', 52 | help='Upload reports to sharing service') 53 | parser.add_argument('--upload-service', type=str, default='jsonbin', 54 | choices=['gist', 'pastebin', 'jsonbin'], 55 | help='Service to upload reports to') 56 | 57 | # Extra options 58 | parser.add_argument('--compare-cpu', action='store_true', 59 | help='Compare with CPU-only performance (disabled by default)') 60 | parser.add_argument('--tflops', type=float, default=None, 61 | help='Specify the total number of trillion floating point operations (TFLOPs) per iteration (not TFLOPS rate)') 62 | 63 | args = parser.parse_args() 64 | 65 | return args 66 | 67 | 68 | def find_models(models_dir, pattern): 69 | """Find all models matching the pattern in the directory""" 70 | search_path = os.path.join(models_dir, pattern) 71 | models = glob.glob(search_path) 72 | 73 | # Also look for compiled models in subdirectories 74 | if '*.mlmodel*' in pattern: 75 | compiled_models = glob.glob(os.path.join(models_dir, '*.mlmodelc')) 76 | models.extend(compiled_models) 77 | 78 | return sorted(models) 79 | 80 | 81 | def main(): 82 | """Main entry point""" 83 | args = parse_args() 84 | 85 | # Find all models 86 | models = find_models(args.models_dir, args.pattern) 87 | 88 | if not models: 89 | print(f"No models found in {args.models_dir} matching pattern {args.pattern}") 90 | return 1 91 | 92 | print(f"Found {len(models)} models to benchmark:") 93 | for i, model in enumerate(models): 94 | print(f" {i+1}. {os.path.basename(model)}") 95 | 96 | # Make sure output directory exists 97 | os.makedirs(args.output_dir, exist_ok=True) 98 | 99 | # Get system info (same for all benchmarks) 100 | system_info = get_system_info() 101 | print("\n=== System Information ===") 102 | print(f"Mac Model: {system_info.get('mac_model', 'Unknown')}") 103 | 104 | # Use the user-friendly macOS version if available, otherwise fall back to the old format 105 | if 'macos_version' in system_info: 106 | print(f"OS: {system_info['macos_version']}") 107 | else: 108 | print(f"OS: {system_info.get('os', {}).get('name', 'Unknown')} " 109 | f"{system_info.get('os', {}).get('release', '')}") 110 | 111 | print(f"CPU: {system_info.get('cpu', {}).get('brand', 'Unknown')}") 112 | print(f"RAM: {system_info.get('ram', {}).get('total_gb', 'Unknown')} GB") 113 | print(f"Apple Silicon: {'Yes' if system_info.get('apple_silicon', False) else 'No'}") 114 | print("===========================\n") 115 | 116 | # Create a summary of all results 117 | summary = { 118 | "system_info": system_info, 119 | "models": [], 120 | "batch_size": args.batch_size, 121 | "sequence_length": args.sequence_length, 122 | "hidden_size": args.hidden_size, 123 | "iterations": args.iterations, 124 | "compute_units": args.compute_units, 125 | "timestamp": time.strftime("%Y-%m-%d %H:%M:%S") 126 | } 127 | 128 | # Process each model 129 | for i, model_path in enumerate(models): 130 | model_name = os.path.basename(model_path) 131 | print(f"\n[{i+1}/{len(models)}] Benchmarking {model_name}...") 132 | 133 | # Create a new benchmark instance for each model to keep results separate 134 | benchmark = Benchmark() 135 | 136 | try: 137 | # Benchmark with the requested compute units 138 | if args.tflops is not None: 139 | print(f"Using provided TFLOPS value: {args.tflops}") 140 | 141 | result = benchmark.benchmark_coreml_file( 142 | model_path=model_path, 143 | model_name=model_name, 144 | num_runs=args.iterations, 145 | batch_size=args.batch_size, 146 | sequence_length=args.sequence_length, 147 | hidden_size=args.hidden_size, 148 | compute_units=args.compute_units, 149 | known_tflops=args.tflops 150 | ) 151 | 152 | # Also benchmark with CPU only if requested 153 | cpu_result = None 154 | if args.compare_cpu and args.compute_units != "CPU_ONLY": 155 | print("\nComparing with CPU-only performance...") 156 | cpu_result = benchmark.benchmark_coreml_file( 157 | model_path=model_path, 158 | model_name=f"{model_name} (CPU)", 159 | num_runs=args.iterations, 160 | batch_size=args.batch_size, 161 | sequence_length=args.sequence_length, 162 | hidden_size=args.hidden_size, 163 | compute_units="CPU_ONLY", 164 | known_tflops=args.tflops 165 | ) 166 | 167 | # Calculate speedup 168 | ane_time = result.inference_time_ms 169 | cpu_time = cpu_result.inference_time_ms 170 | speedup = cpu_time / ane_time if ane_time > 0 else 0 171 | 172 | print("\n=== Performance Comparison ===") 173 | print(f"ANE Inference: {ane_time:.2f} ms") 174 | print(f"CPU Inference: {cpu_time:.2f} ms") 175 | print(f"Speedup: {speedup:.2f}x") 176 | print("=============================\n") 177 | 178 | # Generate individual report 179 | report_filename = f"{i+1:02d}_{model_name.replace('.', '_')}_report.html" 180 | report_path = os.path.join(args.output_dir, report_filename) 181 | 182 | report_url = benchmark.generate_report( 183 | output_path=report_path, 184 | upload=args.upload, 185 | upload_service=args.upload_service, 186 | include_charts=args.include_charts 187 | ) 188 | 189 | if report_url: 190 | print(f"Report uploaded to: {report_url}") 191 | 192 | # Add to summary 193 | model_result = { 194 | "name": model_name, 195 | "path": model_path, 196 | "size_bytes": get_model_size(model_path), 197 | "size_mb": get_model_size(model_path) / (1024 * 1024), 198 | "inference_time_ms": result.inference_time_ms, 199 | "throughput_gbps": result.throughput_gbps, 200 | "tflops": result.tflops, 201 | "report_path": report_path, 202 | "report_url": report_url 203 | } 204 | 205 | if cpu_result: 206 | model_result["cpu_inference_time_ms"] = cpu_result.inference_time_ms 207 | model_result["speedup"] = cpu_result.inference_time_ms / result.inference_time_ms 208 | 209 | summary["models"].append(model_result) 210 | 211 | except Exception as e: 212 | print(f"Error benchmarking {model_name}: {e}") 213 | # Add failed model to summary 214 | summary["models"].append({ 215 | "name": model_name, 216 | "path": model_path, 217 | "error": str(e) 218 | }) 219 | 220 | # Save summary 221 | summary_path = os.path.join(args.output_dir, "batch_profile_summary.json") 222 | with open(summary_path, 'w') as f: 223 | json.dump(summary, f, indent=2) 224 | 225 | print(f"\nBatch profile complete! Summary saved to {summary_path}") 226 | 227 | # Generate summary HTML 228 | html_summary = generate_html_summary(summary) 229 | summary_html_path = os.path.join(args.output_dir, "batch_profile_summary.html") 230 | with open(summary_html_path, 'w') as f: 231 | f.write(html_summary) 232 | 233 | print(f"HTML summary saved to {summary_html_path}") 234 | 235 | # Open the summary HTML report 236 | #webbrowser.open(f"file://{os.path.abspath(summary_html_path)}") 237 | 238 | return 0 239 | 240 | 241 | def generate_html_summary(summary): 242 | """Generate an HTML summary of all benchmarks""" 243 | system_info = summary["system_info"] 244 | models = summary["models"] 245 | 246 | # Start building HTML 247 | html = f""" 248 | 249 | 250 |
251 | 252 | 253 |Generated on: {summary["timestamp"]}
313 |Mac Model: {system_info.get('mac_model', 'Unknown')}
320 |CPU: {system_info.get('cpu', {}).get('brand', 'Unknown')}
321 |CPU Cores: {system_info.get('cpu', {}).get('cores', 'Unknown')}
322 |RAM: {system_info.get('ram', {}).get('total_gb', 'Unknown')} GB
323 |Apple Silicon: {'Yes' if system_info.get('apple_silicon', False) else 'No'}
324 |OS: {system_info.get('os', {}).get('name', 'Unknown')} {system_info.get('os', {}).get('release', '')}
328 |OS Version: {system_info.get('os', {}).get('version', 'Unknown')}
329 |Python Version: {system_info.get('python_version', 'Unknown')}
330 |Compute Units: {summary["compute_units"]}
331 |Batch Size: {summary["batch_size"]}
332 |Sequence Length: {summary["sequence_length"]}
333 |Hidden Size: {summary["hidden_size"]}
334 |Iterations: {summary["iterations"]}
335 || # | 343 |Model | 344 |Size (MB) | 345 |Inference Time (ms) | 346 |Throughput (GB/s) | 347 |TFLOPS | 348 |CPU Speedup | 349 |Report | 350 |
|---|---|---|---|---|---|---|---|
| {i+1} | 362 |{model["name"]} | 363 |Error: {model["error"]} | 364 ||||||
| {i+1} | 379 |{model["name"]} | 380 |{model["size_mb"]:.2f} | 381 |{model["inference_time_ms"]:.2f} | 382 |{model["throughput_gbps"]:.2f} | 383 |{tflops_html} | 384 |{speedup} | 385 |{report_link} | 386 |
This summary report compares the performance of multiple CoreML models on the Apple Neural Engine.
397 | 398 |