├── .python-version ├── optimum_benchmark ├── backends │ ├── ipex │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── config.py │ │ └── backend.py │ ├── vllm │ │ ├── __init__.py │ │ └── config.py │ ├── llama_cpp │ │ ├── __init__.py │ │ ├── config.py │ │ └── backend.py │ ├── openvino │ │ ├── __init__.py │ │ ├── utils.py │ │ └── config.py │ ├── py_txi │ │ ├── __init__.py │ │ └── config.py │ ├── pytorch │ │ ├── __init__.py │ │ └── config.py │ ├── onnxruntime │ │ ├── __init__.py │ │ ├── utils.py │ │ └── config.py │ ├── tensorrt_llm │ │ ├── __init__.py │ │ ├── utils.py │ │ └── config.py │ ├── peft_utils.py │ ├── __init__.py │ ├── diffusers_utils.py │ └── timm_utils.py ├── benchmark │ ├── __init__.py │ ├── config.py │ └── base.py ├── generators │ ├── __init__.py │ ├── input_generator.py │ ├── dataset_generator.py │ └── base.py ├── profilers │ ├── __init__.py │ ├── fx_profiler.py │ └── ort_profiler.py ├── launchers │ ├── inline │ │ ├── __init__.py │ │ ├── launcher.py │ │ └── config.py │ ├── process │ │ ├── __init__.py │ │ └── config.py │ ├── torchrun │ │ ├── __init__.py │ │ └── config.py │ ├── __init__.py │ ├── config.py │ └── base.py ├── scenarios │ ├── training │ │ ├── __init__.py │ │ └── config.py │ ├── energy_star │ │ └── __init__.py │ ├── inference │ │ ├── __init__.py │ │ └── config.py │ ├── __init__.py │ ├── config.py │ └── base.py ├── process_utils.py ├── version.py ├── trackers │ └── __init__.py ├── __init__.py ├── logging_utils.py └── cli.py ├── tests ├── configs │ ├── _cpu_.yaml │ ├── _export_.yaml │ ├── _gpt2_.yaml │ ├── _bloom_.yaml │ ├── _cuda_.yaml │ ├── _dp_.yaml │ ├── _bert_.yaml │ ├── _device_isolation_.yaml │ ├── _device_map_.yaml │ ├── _st_bert_.yaml │ ├── _torch_compile_.yaml │ ├── _timm_.yaml │ ├── _no_weights_.yaml │ ├── _serving_mode_.yaml │ ├── _training_.yaml │ ├── _vllm_.yaml │ ├── _awq_.yaml │ ├── _tensorrt_llm_pp_.yaml │ ├── _tensorrt_llm_tp_.yaml │ ├── _ort_quant_.yaml │ ├── _peft_.yaml │ ├── _bnb_.yaml │ ├── _gptq_.yaml │ ├── _text_encoders_decoders_.yaml │ ├── _diffusers_.yaml │ ├── _text_decoders_.yaml │ ├── _gguf_.yaml │ ├── _text_encoders_.yaml │ ├── _ddp_.yaml │ ├── _tp_.yaml │ ├── _vllm_pp_.yaml │ ├── _vllm_tp_.yaml │ ├── _image_text_to_text_.yaml │ ├── cpu_inference_pytorch_timm.yaml │ ├── _inference_.yaml │ ├── cpu_inference_llama_cpp_gguf.yaml │ ├── cpu_inference_openvino_diffusers.yaml │ ├── cpu_inference_pytorch_diffusers.yaml │ ├── cuda_inference_tensorrt_llm.yaml │ ├── cuda_inference_tensorrt_llm_pp.yaml │ ├── cuda_inference_tensorrt_llm_tp.yaml │ ├── _deepspeed_inference_.yaml │ ├── cpu_inference_py_txi_gpt2.yaml │ ├── cuda_inference_py_txi_gpt2.yaml │ ├── cpu_inference_onnxruntime_timm.yaml │ ├── cpu_inference_py_txi_st_bert.yaml │ ├── cuda_inference_py_txi_st_bert.yaml │ ├── cuda_inference_pytorch_timm.yaml │ ├── cpu_inference_ipex_text_decoders.yaml │ ├── cpu_inference_onnxruntime_diffusers.yaml │ ├── cpu_training_pytorch_text_decoders.yaml │ ├── cpu_training_pytorch_text_encoders.yaml │ ├── cpu_inference_ipex_text_encoders.yaml │ ├── cpu_inference_pytorch_text_decoders.yaml │ ├── cpu_inference_pytorch_text_encoders.yaml │ ├── cpu_inference_pytorch_timm_torch_compile.yaml │ ├── cuda_inference_pytorch_diffusers.yaml │ ├── cpu_inference_openvino_text_decoders.yaml │ ├── cpu_inference_openvino_text_encoders.yaml │ ├── cpu_inference_pytorch_diffusers_torch_compile.yaml │ ├── cpu_inference_pytorch_image_text_to_text.yaml │ ├── cuda_inference_vllm_bloom_pp.yaml │ ├── cuda_inference_vllm_bloom_tp.yaml │ ├── cuda_training_pytorch_ddp.yaml │ ├── cpu_inference_pytorch_text_encoders_decoders.yaml │ ├── cuda_inference_pytorch_bnb.yaml │ ├── cuda_inference_pytorch_gptq.yaml │ ├── cuda_training_pytorch_dp.yaml │ ├── cuda_training_pytorch_peft.yaml │ ├── cuda_inference_pytorch_tp.yaml │ ├── cuda_training_pytorch_device_map.yaml │ ├── cpu_inference_onnxruntime_ort_quant.yaml │ ├── cpu_inference_onnxruntime_text_decoders.yaml │ ├── cpu_inference_onnxruntime_text_encoders.yaml │ ├── cuda_inference_pytorch_device_map.yaml │ ├── cuda_inference_pytorch_timm_torch_compile.yaml │ ├── cuda_inference_vllm_bloom.yaml │ ├── cuda_training_pytorch_text_decoders.yaml │ ├── cuda_training_pytorch_text_encoders.yaml │ ├── cuda_inference_pytorch_text_decoders.yaml │ ├── cuda_inference_pytorch_text_encoders.yaml │ ├── cuda_inference_pytorch_diffusers_torch_compile.yaml │ ├── cpu_inference_onnxruntime_text_encoders_decoders.yaml │ ├── cuda_inference_pytorch_deepspeed_inference.yaml │ ├── cuda_inference_onnxruntime_text_decoders.yaml │ ├── cuda_inference_onnxruntime_text_encoders.yaml │ └── _base_.yaml ├── conftest.py ├── test_examples.py └── test_energy_star.py ├── logo.png ├── uv.toml ├── examples ├── mps_pytorch_bert.yaml ├── cpu_llama_cpp_text_generation.yaml ├── cuda_pytorch_bert.yaml ├── cpu_llama_cpp_embedding.yaml ├── cpu_openvino_diffusion.yaml ├── cpu_openvino_8bit_bert.yaml ├── cpu_onnxruntime_static_quant_vit.yaml ├── _base_.yaml ├── cuda_pytorch_llama.yaml ├── cuda_tgi_llama.yaml ├── cuda_trt_llama.yaml ├── cpu_ipex_bert.yaml ├── cuda_pytorch_llama_compile_model.yaml ├── cuda_pytorch_llama_compile_regions.yaml ├── cpu_ipex_llama.yaml ├── cuda_vllm_llama.yaml ├── cuda_pytorch_vlm.yaml ├── cuda_pytorch_bert.py └── cuda_pytorch_llama_quants.py ├── docker ├── unroot │ └── Dockerfile ├── cpu │ └── Dockerfile ├── cuda │ └── Dockerfile └── rocm │ └── Dockerfile ├── energy_star ├── _base_.yaml ├── object_detection.yaml ├── image_to_text.yaml ├── image_classification.yaml ├── automatic_speech_recognition.yaml ├── text_to_image.yaml ├── text_classification.yaml ├── question_answering.yaml ├── summarization.yaml ├── sentence_similarity.yaml ├── t5_question_answering.yaml ├── t5_summarization.yaml ├── t5_text_generation.yaml ├── t5_text_classification.yaml └── text_generation.yaml ├── .github └── workflows │ ├── quality.yaml │ ├── security.yml │ ├── test_energy_star.yaml │ ├── test_api_rocm.yaml │ ├── test_cli_cpu_ipex.yaml │ ├── test_cli_cpu_pytorch.yaml │ ├── test_cli_cpu_openvino.yaml │ ├── test_cli_cpu_llama_cpp.yaml │ ├── test_cli_cpu_onnxruntime.yaml │ ├── test_cli_cuda_py_txi.yaml │ ├── test_cli_cuda_onnxruntime.yaml │ ├── test_api_cpu.yaml │ ├── test_api_cuda.yaml │ ├── test_api_misc.yaml │ ├── test_cli_misc.yaml │ ├── test_cli_cpu_py_txi.yaml │ ├── test_cli_rocm_pytorch.yaml │ ├── test_cli_cuda_vllm.yaml │ ├── test_cli_cuda_pytorch.yaml │ ├── test_cli_cuda_tensorrt_llm.yaml │ └── images.yaml ├── scripts ├── total_tests_runs.py └── update_ci_badges.py ├── CONTRIBUTING.md └── .gitignore /.python-version: -------------------------------------------------------------------------------- 1 | 3.10 2 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/ipex/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/vllm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/generators/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/profilers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/llama_cpp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/openvino/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/py_txi/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/inline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/process/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/torchrun/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/training/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/onnxruntime/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/tensorrt_llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/energy_star/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/configs/_cpu_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device: cpu 3 | -------------------------------------------------------------------------------- /tests/configs/_export_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | export: true 3 | -------------------------------------------------------------------------------- /tests/configs/_gpt2_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: openai-community/gpt2 3 | -------------------------------------------------------------------------------- /tests/configs/_bloom_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: bigscience/bloom-560m 3 | -------------------------------------------------------------------------------- /tests/configs/_cuda_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device: cuda 3 | device_ids: 0 4 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum-benchmark/HEAD/logo.png -------------------------------------------------------------------------------- /tests/configs/_dp_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device_ids: 0,1 3 | model: openai-community/gpt2 4 | -------------------------------------------------------------------------------- /tests/configs/_bert_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: google-bert/bert-base-uncased 3 | task: feature-extraction 4 | -------------------------------------------------------------------------------- /tests/configs/_device_isolation_.yaml: -------------------------------------------------------------------------------- 1 | launcher: 2 | device_isolation: true 3 | device_isolation_action: warn 4 | -------------------------------------------------------------------------------- /tests/configs/_device_map_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device_ids: 0,1 3 | device_map: auto 4 | model: openai-community/gpt2 5 | -------------------------------------------------------------------------------- /tests/configs/_st_bert_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: sentence-transformers/all-MiniLM-L6-v2 3 | task: feature-extraction 4 | -------------------------------------------------------------------------------- /tests/configs/_torch_compile_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | torch_compile: true 3 | torch_compile_config: 4 | backend: eager 5 | -------------------------------------------------------------------------------- /tests/configs/_timm_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | library: timm 3 | task: image-classification 4 | model: timm/tiny_vit_21m_224.in1k 5 | -------------------------------------------------------------------------------- /tests/configs/_no_weights_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.no_weights: true,false 6 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/tensorrt_llm/utils.py: -------------------------------------------------------------------------------- 1 | MODEL_TYPE_TO_TRTLLMMODELS = {"llama": "optimum.nvidia.models.llama.LlamaForCausalLM"} 2 | -------------------------------------------------------------------------------- /tests/configs/_serving_mode_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.serving_mode: online,offline 6 | -------------------------------------------------------------------------------- /tests/configs/_training_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override scenario: training 3 | 4 | scenario: 5 | max_steps: 5 6 | warmup_steps: 2 7 | -------------------------------------------------------------------------------- /tests/configs/_vllm_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | serving_mode: offline 3 | engine_args: 4 | max_model_len: 512 5 | enforce_eager: true 6 | -------------------------------------------------------------------------------- /tests/configs/_awq_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: TheBloke/TinyLlama-1.1B-Chat-v1.0-AWQ 3 | quantization_scheme: awq 4 | quantization_config: 5 | version: exllama 6 | -------------------------------------------------------------------------------- /tests/configs/_tensorrt_llm_pp_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 3 | gpus_per_node: 2 4 | device_ids: 0,1 5 | world_size: 2 6 | pp: 2 7 | -------------------------------------------------------------------------------- /tests/configs/_tensorrt_llm_tp_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 3 | gpus_per_node: 2 4 | device_ids: 0,1 5 | world_size: 2 6 | tp: 2 7 | -------------------------------------------------------------------------------- /tests/configs/_ort_quant_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: google-bert/bert-base-uncased 3 | quantization: true 4 | quantization_config: 5 | is_static: true 6 | calibration: true 7 | -------------------------------------------------------------------------------- /tests/configs/_peft_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | peft_type: LORA 3 | 4 | hydra: 5 | mode: MULTIRUN 6 | sweeper: 7 | params: 8 | backend.model: openai-community/gpt2,google-bert/bert-base-uncased -------------------------------------------------------------------------------- /tests/configs/_bnb_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 3 | quantization_scheme: bnb 4 | quantization_config: 5 | load_in_4bit: true 6 | bnb_4bit_compute_dtype: float16 7 | -------------------------------------------------------------------------------- /tests/configs/_gptq_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ 3 | quantization_scheme: gptq 4 | quantization_config: 5 | exllama_config: 6 | version: 2 7 | max_input_len: 512 8 | -------------------------------------------------------------------------------- /tests/configs/_text_encoders_decoders_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.task: text2text-generation 6 | backend.model: hf-internal-testing/tiny-random-T5ForConditionalGeneration 7 | -------------------------------------------------------------------------------- /tests/configs/_diffusers_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.library: diffusers 6 | backend.task: text-to-image 7 | backend.model: hf-internal-testing/tiny-stable-diffusion-torch 8 | -------------------------------------------------------------------------------- /tests/configs/_text_decoders_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.task: text-generation 6 | backend.model: hf-internal-testing/tiny-random-GPT2LMHeadModel,hf-internal-testing/tiny-random-LlamaForCausalLM 7 | -------------------------------------------------------------------------------- /tests/configs/_gguf_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.task: text-generation,feature-extraction 6 | backend.filename: DistilGPT2-TinyStories.Q4_K_S.gguf 7 | backend.model: mradermacher/DistilGPT2-TinyStories-GGUF 8 | -------------------------------------------------------------------------------- /tests/configs/_text_encoders_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.task: fill-mask,text-classification,token-classification,question-answering 6 | backend.model: hf-internal-testing/tiny-random-BertModel,hf-internal-testing/tiny-random-RobertaModel 7 | -------------------------------------------------------------------------------- /tests/configs/_ddp_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override launcher: torchrun 3 | 4 | launcher: 5 | nproc_per_node: 2 6 | 7 | backend: 8 | device_ids: 0,1 9 | model: hf-internal-testing/tiny-random-LlamaForCausalLM 10 | 11 | hydra: 12 | job: 13 | env_set: 14 | LOG_ALL_RANKS: 1 15 | -------------------------------------------------------------------------------- /tests/configs/_tp_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override launcher: torchrun 3 | 4 | launcher: 5 | nproc_per_node: 2 6 | 7 | backend: 8 | tp_plan: auto 9 | device_ids: 0,1 10 | model: hf-internal-testing/tiny-random-LlamaForCausalLM 11 | 12 | hydra: 13 | job: 14 | env_set: 15 | LOG_ALL_RANKS: 1 -------------------------------------------------------------------------------- /tests/configs/_vllm_pp_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device_ids: 0,1 3 | serving_mode: online 4 | engine_args: 5 | max_model_len: 512 6 | enforce_eager: true 7 | pipeline_parallel_size: 2 8 | distributed_executor_backend: mp 9 | 10 | hydra: 11 | job: 12 | env_set: 13 | VLLM_WORKER_MULTIPROC_METHOD: spawn 14 | -------------------------------------------------------------------------------- /tests/configs/_vllm_tp_.yaml: -------------------------------------------------------------------------------- 1 | backend: 2 | device_ids: 0,1 3 | serving_mode: offline 4 | engine_args: 5 | max_model_len: 512 6 | enforce_eager: true 7 | tensor_parallel_size: 2 8 | distributed_executor_backend: mp 9 | 10 | hydra: 11 | job: 12 | env_set: 13 | VLLM_WORKER_MULTIPROC_METHOD: spawn 14 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import LauncherConfig # noqa: F401 2 | from .inline.config import InlineConfig # noqa: F401 3 | from .process.config import ProcessConfig # noqa: F401 4 | from .torchrun.config import TorchrunConfig # noqa: F401 5 | 6 | __all__ = [ 7 | "InlineConfig", 8 | "ProcessConfig", 9 | "TorchrunConfig", 10 | "LauncherConfig", 11 | ] 12 | -------------------------------------------------------------------------------- /uv.toml: -------------------------------------------------------------------------------- 1 | # UV package manager configuration for optimum-benchmark 2 | 3 | # Use the latest stable Python 4 | python-preference = "only-managed" 5 | 6 | # Resolution strategy 7 | resolution = "highest" 8 | 9 | # Enable preview features 10 | preview = true 11 | 12 | # Extra build dependencies for specific packages 13 | [extra-build-dependencies] 14 | flash-attn = ["torch"] 15 | gptqmodel = ["torch"] 16 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import ScenarioConfig # noqa: F401 2 | from .energy_star.config import EnergyStarConfig # noqa: F401 3 | from .inference.config import InferenceConfig # noqa: F401 4 | from .training.config import TrainingConfig # noqa: F401 5 | 6 | __all__ = [ 7 | "EnergyStarConfig", 8 | "InferenceConfig", 9 | "TrainingConfig", 10 | "ScenarioConfig", 11 | ] 12 | -------------------------------------------------------------------------------- /tests/configs/_image_text_to_text_.yaml: -------------------------------------------------------------------------------- 1 | hydra: 2 | mode: MULTIRUN 3 | sweeper: 4 | params: 5 | backend.task: image-text-to-text 6 | backend.model: 7 | hf-internal-testing/tiny-random-BlipForConditionalGeneration, 8 | hf-internal-testing/tiny-random-IdeficsForVisionText2Text, 9 | hf-internal-testing/tiny-random-GitForCausalLM 10 | +scenario.input_shapes.num_images: 2 11 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_timm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _timm_ # inherits from timm config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: pytorch 9 | 10 | name: cpu_inference_pytorch_timm 11 | -------------------------------------------------------------------------------- /tests/configs/_inference_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override scenario: inference 3 | 4 | scenario: 5 | memory: true 6 | latency: true 7 | 8 | duration: 1 9 | iterations: 1 10 | warmup_runs: 1 11 | 12 | input_shapes: 13 | batch_size: 1 14 | sequence_length: 16 15 | 16 | generate_kwargs: 17 | max_new_tokens: 16 18 | min_new_tokens: 16 19 | 20 | call_kwargs: 21 | num_inference_steps: 4 22 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_llama_cpp_gguf.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _gguf_ # inherits from llama_cpp config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: llama_cpp 9 | 10 | name: cpu_inference_llama_cpp_gpt2_gguf 11 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_openvino_diffusers.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _diffusers_ # inherits from diffusers config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: openvino 9 | 10 | name: cpu_inference_openvino_diffusers 11 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_diffusers.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _diffusers_ # inherits from diffusers config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: pytorch 9 | 10 | name: cpu_inference_pytorch_diffusers 11 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/config.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from dataclasses import dataclass 3 | from logging import getLogger 4 | from typing import TypeVar 5 | 6 | LOGGER = getLogger("benchmark") 7 | 8 | 9 | @dataclass 10 | class ScenarioConfig(ABC): 11 | name: str 12 | _target_: str 13 | 14 | def __post_init__(self): 15 | pass 16 | 17 | 18 | ScenarioConfigT = TypeVar("ScenarioConfigT", bound=ScenarioConfig) 19 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_tensorrt_llm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _self_ # hydra 1.1 compatibility 7 | - override backend: tensorrt-llm 8 | 9 | name: cuda_inference_tensorrt_llm 10 | 11 | backend: 12 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_tensorrt_llm_pp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _tensorrt_llm_pp_ # inherits from tensorrt_llm_pp config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: tensorrt-llm 9 | 10 | name: cuda_inference_tensorrt_llm_pp 11 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_tensorrt_llm_tp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _tensorrt_llm_tp_ # inherits from tensorrt_llm_tp config 7 | - _self_ # hydra 1.1 compatibility 8 | - override backend: tensorrt-llm 9 | 10 | name: cuda_inference_tensorrt_llm_tp 11 | -------------------------------------------------------------------------------- /optimum_benchmark/process_utils.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.connection import Connection 2 | from typing import TypeVar 3 | 4 | DeserializedType = TypeVar("DeserializedType") 5 | 6 | 7 | def sync_with_parent(child_connection: Connection) -> None: 8 | child_connection.recv() 9 | child_connection.send(0) 10 | 11 | 12 | def sync_with_child(parent_connection: Connection) -> None: 13 | parent_connection.send(0) 14 | parent_connection.recv() 15 | -------------------------------------------------------------------------------- /tests/configs/_deepspeed_inference_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - override launcher: torchrun 3 | 4 | launcher: 5 | nproc_per_node: 2 6 | 7 | backend: 8 | device_ids: 0,1 9 | model: google-bert/bert-base-uncased 10 | deepspeed_inference: true 11 | deepspeed_inference_config: 12 | tensor_parallel: 13 | tp_size: 2 14 | 15 | scenario: 16 | input_shapes: 17 | batch_size: 2 18 | 19 | hydra: 20 | job: 21 | env_set: 22 | LOG_ALL_RANKS: 1 23 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_py_txi_gpt2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _gpt2_ # inherits from gpt2 config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: py-txi 10 | 11 | name: cpu_inference_py_txi_gpt2 12 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_py_txi_gpt2.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _gpt2_ # inherits from gpt2 config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: py-txi 10 | 11 | name: cuda_inference_py_txi_gpt2 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_timm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _export_ # inherits from export config 7 | - _timm_ # inherits from timm config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: onnxruntime 10 | 11 | name: cpu_inference_onnxruntime_timm 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_py_txi_st_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _st_bert_ # inherits from bert config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: py-txi 10 | 11 | name: cpu_inference_py_txi_st_bert 12 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_py_txi_st_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _st_bert_ # inherits from bert config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: py-txi 10 | 11 | name: cuda_inference_py_txi_st_bert 12 | -------------------------------------------------------------------------------- /examples/mps_pytorch_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: inline # mps fails with python multi-processing for some reason 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: mps_pytorch_bert 10 | 11 | backend: 12 | device: mps 13 | no_weights: true 14 | model: bert-base-uncased 15 | 16 | scenario: 17 | memory: true 18 | latency: true 19 | input_shapes: 20 | batch_size: 1 21 | sequence_length: 128 22 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_timm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _timm_ # inherits from timm config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cuda_inference_pytorch_timm 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_ipex_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: ipex 10 | 11 | name: cpu_inference_ipex_text_decoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_diffusers.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _diffusers_ # inherits from diffusers config 7 | - _export_ # inherits from export config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: onnxruntime 10 | 11 | name: cpu_inference_onnxruntime_diffusers 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_training_pytorch_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _training_ # inherits from training config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_training_pytorch_text_decoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_training_pytorch_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _training_ # inherits from training config 6 | - _text_encoders_ # inherits from text encoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_training_pytorch_text_encoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_ipex_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders sweep config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: ipex 10 | 11 | name: cpu_inference_ipex_text_encoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_text_decoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_text_encoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_timm_torch_compile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _torch_compile_ # inherits from torch compile config 6 | - _inference_ # inherits from inference config 7 | - _timm_ # inherits from timm config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_timm_torch_compile 12 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_diffusers.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _diffusers_ # inherits from diffusers config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cuda_inference_pytorch_diffusers 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_openvino_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: openvino 10 | 11 | name: cpu_inference_openvino_text_decoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_openvino_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders sweep config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: openvino 10 | 11 | name: cpu_inference_openvino_text_encoders 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_diffusers_torch_compile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _torch_compile_ # inherits from torch compile config 7 | - _diffusers_ # inherits from diffusers config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_diffusers_torch_compile 12 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_image_text_to_text.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _image_text_to_text_ # inherits from image text to text config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_image_text_to_text 12 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_vllm_bloom_pp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _vllm_pp_ # inherits from vllm pp config 8 | - _bloom_ # inherits from bloom config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: vllm 11 | 12 | name: cuda_inference_vllm_bloom_pp 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_vllm_bloom_tp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _no_weights_ # inherits from no weights config 7 | - _vllm_tp_ # inherits from vllm tp config 8 | - _bloom_ # inherits from bloom config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: vllm 11 | 12 | name: cuda_inference_vllm_bloom_tp 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_ddp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _ddp_ # inherits from ddp config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_ddp 13 | -------------------------------------------------------------------------------- /examples/cpu_llama_cpp_text_generation.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - backend: llama_cpp 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_llama_cpp_text_generation 10 | 11 | backend: 12 | device: cpu 13 | task: text-generation 14 | model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF 15 | filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf 16 | 17 | scenario: 18 | memory: true 19 | latency: true 20 | 21 | input_shapes: 22 | batch_size: 1 23 | sequence_length: 128 24 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_pytorch_text_encoders_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_decoders_ # inherits from text encoders decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _self_ # hydra 1.1 compatibility 9 | - override backend: pytorch 10 | 11 | name: cpu_inference_pytorch_text_encoders_decoders 12 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_bnb.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _bnb_ # inherits from bnb config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_bnb 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_gptq.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _gptq_ # inherits from gptq config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_gptq 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_dp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _dp_ # inherits from data parallel config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_dp 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_peft.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _peft_ # inherits from peft config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_peft 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_tp.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _tp_ # inherits from tensor parallel config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_tp 13 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_pytorch_bert 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: google-bert/bert-base-uncased 20 | 21 | scenario: 22 | memory: true 23 | latency: true 24 | 25 | input_shapes: 26 | batch_size: 1 27 | sequence_length: 128 28 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_device_map.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _device_map_ # inherits from device map config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_device_map 13 | -------------------------------------------------------------------------------- /examples/cpu_llama_cpp_embedding.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - backend: llama_cpp 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_llama_cpp_embedding 10 | 11 | backend: 12 | device: cpu 13 | task: feature-extraction 14 | model: nomic-ai/nomic-embed-text-v1.5-GGUF 15 | filename: nomic-embed-text-v1.5.Q4_0.gguf 16 | 17 | scenario: 18 | input_shapes: 19 | batch_size: 1 20 | sequence_length: 64 21 | 22 | generate_kwargs: 23 | max_new_tokens: 32 24 | min_new_tokens: 32 25 | -------------------------------------------------------------------------------- /examples/cpu_openvino_diffusion.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - backend: openvino 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: openvino_diffusion 10 | 11 | backend: 12 | device: cpu 13 | export: true 14 | task: text-to-image 15 | model: stabilityai/stable-diffusion-2-1 16 | half: false # enable half-precision on compatible Intel CPU machines 17 | 18 | scenario: 19 | input_shapes: 20 | batch_size: 1 21 | sequence_length: 16 22 | 23 | call_kwargs: 24 | num_inference_steps: 4 25 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_ort_quant.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _ort_quant_ # inherits from ort static quant config 7 | - _no_weights_ # inherits from no weights sweep config 8 | - _export_ # inherits from export config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: onnxruntime 11 | 12 | name: cpu_inference_onnxruntime_ort_quant 13 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _export_ # inherits from export config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: onnxruntime 11 | 12 | name: cpu_inference_onnxruntime_text_decoders 13 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _export_ # inherits from export config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: onnxruntime 11 | 12 | name: cpu_inference_onnxruntime_text_encoders 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_device_map.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _no_weights_ # inherits from no weights config 8 | - _device_map_ # inherits from device map config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_device_map 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_timm_torch_compile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _torch_compile_ # inherits from torch compile config 8 | - _timm_ # inherits from timm config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_timm_torch_compile 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_vllm_bloom.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _serving_mode_ # inherits from serving_mode config 7 | - _no_weights_ # inherits from no weights config 8 | - _bloom_ # inherits from bloom config 9 | - _vllm_ # inherits from vllm config 10 | - _self_ # hydra 1.1 compatibility 11 | - override backend: vllm 12 | 13 | name: cuda_inference_vllm_bloom 14 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_text_decoders 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_training_pytorch_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _training_ # inherits from training config 6 | - _text_encoders_ # inherits from text encoders config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_training_pytorch_text_encoders 13 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Pytest configuration for optimum-benchmark tests.""" 2 | 3 | import pytest 4 | 5 | 6 | def pytest_sessionfinish(session, exitstatus): 7 | """ 8 | Hook to modify the exit status when no tests are collected. 9 | 10 | This prevents pytest from returning a non-zero exit code when no tests 11 | are found, which is useful for CI/CD pipelines where some test runs 12 | might legitimately have no tests to run. 13 | """ 14 | if exitstatus == pytest.ExitCode.NO_TESTS_COLLECTED: 15 | session.exitstatus = pytest.ExitCode.OK 16 | -------------------------------------------------------------------------------- /examples/cpu_openvino_8bit_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - backend: openvino 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_openvino_8bit_bert 10 | 11 | backend: 12 | device: cpu 13 | reshape: true 14 | no_weights: true 15 | load_in_8bit: true 16 | model: google-bert/bert-base-uncased 17 | reshape_kwargs: 18 | batch_size: 1 19 | sequence_length: 128 20 | 21 | scenario: 22 | memory: true 23 | latency: true 24 | 25 | input_shapes: 26 | batch_size: 1 27 | sequence_length: 128 28 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_text_decoders 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_text_encoders 13 | -------------------------------------------------------------------------------- /examples/cpu_onnxruntime_static_quant_vit.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: onnxruntime 4 | - scenario: inference 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_onnxruntime_static_quant_vit 10 | 11 | backend: 12 | device: cpu 13 | export: true 14 | no_weights: true 15 | model: google/vit-base-patch16-224 16 | quantization: true 17 | quantization_config: 18 | is_static: true 19 | per_channel: false 20 | calibration: true 21 | 22 | scenario: 23 | memory: true 24 | latency: true 25 | input_shapes: 26 | batch_size: 2 27 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_diffusers_torch_compile.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _device_isolation_ # inherits from device isolation config 7 | - _torch_compile_ # inherits from torch compile config 8 | - _diffusers_ # inherits from diffusers config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_diffusers_torch_compile 13 | -------------------------------------------------------------------------------- /docker/unroot/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG IMAGE="optimum-benchmark:latest" 2 | 3 | FROM $IMAGE 4 | 5 | # Create a non-root user 6 | ARG USER_ID 7 | ARG GROUP_ID 8 | ENV PATH="/home/user/.local/bin:${PATH}" 9 | 10 | RUN addgroup --gid $GROUP_ID group 11 | RUN adduser --disabled-password --gecos '' --uid $USER_ID --gid $GROUP_ID user 12 | 13 | # For ROCm, the user needs to be in the video and render groups, check with /opt/rocm/ 14 | RUN if [ -d /opt/rocm/ ]; then usermod -a -G video user; fi 15 | RUN if [ -d /opt/rocm/ ]; then usermod -a -G render user; fi 16 | 17 | USER user 18 | WORKDIR /home/user 19 | 20 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/process/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from ..config import LauncherConfig 4 | 5 | 6 | @dataclass 7 | class ProcessConfig(LauncherConfig): 8 | name: str = "process" 9 | _target_: str = "optimum_benchmark.launchers.process.launcher.ProcessLauncher" 10 | 11 | start_method: str = "spawn" 12 | 13 | def __post_init__(self): 14 | super().__post_init__() 15 | 16 | if self.start_method not in ["spawn", "fork"]: 17 | raise ValueError(f"start_method must be one of ['spawn', 'fork'], got {self.start_method}") 18 | -------------------------------------------------------------------------------- /tests/configs/cpu_inference_onnxruntime_text_encoders_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cpu_ # inherits from cpu config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_decoders_ # inherits from text encoders decoders config 7 | - _no_weights_ # inherits from no weights config 8 | - _export_ # inherits from export config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: onnxruntime 11 | 12 | name: cpu_inference_onnxruntime_text_encoders_decoders 13 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_pytorch_deepspeed_inference.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _deepspeed_inference_ # inherits from deepspeed inference config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _self_ # hydra 1.1 compatibility 10 | - override backend: pytorch 11 | 12 | name: cuda_inference_pytorch_deepspeed_inference 13 | -------------------------------------------------------------------------------- /examples/_base_.yaml: -------------------------------------------------------------------------------- 1 | log_report: true 2 | print_report: true 3 | 4 | # hydra/cli specific settings 5 | hydra: 6 | run: 7 | # define run directory 8 | dir: runs/${name} 9 | sweep: 10 | # define sweep directory 11 | dir: sweeps/${name} 12 | subdir: ${hydra.job.override_dirname} 13 | job: 14 | # change working directory to the job directory 15 | # so that artifacts are stored there 16 | chdir: true 17 | env_set: 18 | # set environment variable OVERRIDE_BENCHMARKS to 1 19 | # to not skip benchmarks that have been run before 20 | OVERRIDE_BENCHMARKS: 1 21 | -------------------------------------------------------------------------------- /energy_star/_base_.yaml: -------------------------------------------------------------------------------- 1 | log_report: true 2 | print_report: true 3 | 4 | # hydra/cli specific settings 5 | hydra: 6 | run: 7 | # define run directory 8 | dir: runs/${name} 9 | sweep: 10 | # define sweep directory 11 | dir: sweeps/${name} 12 | subdir: ${hydra.job.override_dirname} 13 | job: 14 | # change working directory to the job directory 15 | # so that artifacts are stored there 16 | chdir: true 17 | env_set: 18 | # set environment variable OVERRIDE_BENCHMARKS to 1 19 | # to not skip benchmarks that have been run before 20 | OVERRIDE_BENCHMARKS: 1 21 | -------------------------------------------------------------------------------- /energy_star/object_detection.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: object_detection 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: object-detection 20 | model: facebook/detr-resnet-50 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/object_detection 24 | image_column_name: image 25 | num_samples: 1000 26 | 27 | input_shapes: 28 | batch_size: 1 29 | -------------------------------------------------------------------------------- /energy_star/image_to_text.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: image_to_text 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: image-to-text 20 | model: sashakunitsyn/vlrm-blip2-opt-2.7b 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/image_captioning 24 | image_column_name: image 25 | num_samples: 1000 26 | 27 | input_shapes: 28 | batch_size: 1 29 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_llama.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_pytorch_llama 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | torch_dtype: float16 20 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 21 | 22 | scenario: 23 | input_shapes: 24 | batch_size: 4 25 | sequence_length: 64 26 | 27 | generate_kwargs: 28 | max_new_tokens: 32 29 | min_new_tokens: 32 30 | -------------------------------------------------------------------------------- /energy_star/image_classification.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: image_classification 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: image-classification 20 | model: google/vit-base-patch16-224 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/image_classification 24 | image_column_name: image 25 | num_samples: 1000 26 | 27 | input_shapes: 28 | batch_size: 1 29 | -------------------------------------------------------------------------------- /energy_star/automatic_speech_recognition.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: automatic_speech_recognition 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: openai/whisper-large-v3 20 | task: automatic-speech-recognition 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/ASR 24 | audio_column_name: audio 25 | num_samples: 1000 26 | 27 | input_shapes: 28 | batch_size: 1 29 | -------------------------------------------------------------------------------- /energy_star/text_to_image.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: image_generation_tiny 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: false 19 | model: segmind/tiny-sd 20 | 21 | scenario: 22 | dataset_name: EnergyStarAI/image_generation 23 | text_column_name: prompt 24 | num_samples: 1000 25 | 26 | input_shapes: 27 | batch_size: 1 28 | 29 | call_kwargs: 30 | num_images_per_prompt: 1 31 | -------------------------------------------------------------------------------- /energy_star/text_classification.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: text_classification 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: text-classification 20 | model: lvwerra/distilbert-imdb 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/text_classification 24 | text_column_name: text 25 | num_samples: 1000 26 | truncation: True 27 | 28 | input_shapes: 29 | batch_size: 1 30 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/ipex/utils.py: -------------------------------------------------------------------------------- 1 | TASKS_TO_IPEXMODELS = { 2 | "fill-mask": "optimum.intel.IPEXModelForMaskedLM", 3 | "text-generation": "optimum.intel.IPEXModelForCausalLM", 4 | "feature-extraction": "optimum.intel.IPEXModel", 5 | "text-classification": "optimum.intel.IPEXModelForSequenceClassification", 6 | "token-classification": "optimum.intel.IPEXModelForTokenClassification", 7 | "question-answering": "optimum.intel.IPEXModelForQuestionAnswering", 8 | "image-classification": "optimum.intel.IPEXModelForImageClassification", 9 | "audio-classification": "optimum.intel.IPEXModelForAudioClassification", 10 | } 11 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_onnxruntime_text_decoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _text_decoders_ # inherits from text decoders sweep config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _export_ # inherits from export config 10 | - _self_ # hydra 1.1 compatibility 11 | - override backend: onnxruntime 12 | 13 | name: cuda_inference_onnxruntime_text_decoders_no_weights 14 | -------------------------------------------------------------------------------- /tests/configs/cuda_inference_onnxruntime_text_encoders.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | # order of inheritance, last one overrides previous ones 3 | - _base_ # inherits from base config 4 | - _cuda_ # inherits from cuda config 5 | - _inference_ # inherits from inference config 6 | - _text_encoders_ # inherits from text encoders sweep config 7 | - _device_isolation_ # inherits from device isolation config 8 | - _no_weights_ # inherits from no weights config 9 | - _export_ # inherits from export config 10 | - _self_ # hydra 1.1 compatibility 11 | - override backend: onnxruntime 12 | 13 | name: cuda_inference_onnxruntime_text_encoders_no_weights 14 | -------------------------------------------------------------------------------- /energy_star/question_answering.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: question_answering 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: question-answering 20 | model: deepset/electra-base-squad2 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/extractive_qa 24 | question_column_name: question 25 | context_column_name: context 26 | num_samples: 1000 27 | 28 | input_shapes: 29 | batch_size: 1 30 | -------------------------------------------------------------------------------- /examples/cuda_tgi_llama.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: py-txi 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_tgi_llama 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | cuda_graphs: 0 # remove for better perf but bigger memory footprint 19 | no_weights: true 20 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 21 | 22 | scenario: 23 | input_shapes: 24 | batch_size: 1 25 | sequence_length: 64 26 | 27 | generate_kwargs: 28 | max_new_tokens: 32 29 | min_new_tokens: 32 30 | -------------------------------------------------------------------------------- /examples/cuda_trt_llama.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: tensorrt-llm 4 | - scenario: inference 5 | - launcher: process 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_trt_llama 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | max_batch_size: 4 20 | max_new_tokens: 32 21 | max_prompt_length: 64 22 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 23 | 24 | scenario: 25 | input_shapes: 26 | batch_size: 1 27 | sequence_length: 64 28 | 29 | generate_kwargs: 30 | max_new_tokens: 32 31 | min_new_tokens: 32 32 | -------------------------------------------------------------------------------- /examples/cpu_ipex_bert.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: ipex 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_ipex_bert 10 | 11 | launcher: 12 | numactl: true 13 | numactl_kwargs: 14 | cpunodebind: 0 15 | membind: 0 16 | 17 | backend: 18 | device: cpu 19 | no_weights: false # on multi-node machines, inline weights initialization harms performance 20 | torch_dtype: float32 # use bfloat16 on compatible Intel CPUs 21 | model: google-bert/bert-base-uncased 22 | 23 | scenario: 24 | memory: true 25 | latency: true 26 | 27 | input_shapes: 28 | batch_size: 1 29 | sequence_length: 128 30 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/peft_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from transformers import PreTrainedModel 4 | 5 | from ..import_utils import is_peft_available 6 | 7 | if is_peft_available(): 8 | from peft import PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_model # type: ignore 9 | 10 | 11 | def apply_peft(model: "PreTrainedModel", peft_type: str, peft_config: Dict[str, Any]) -> "PreTrainedModel": 12 | if not is_peft_available(): 13 | raise ImportError("peft is not available. Please, pip install peft.") 14 | 15 | peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config) 16 | 17 | return get_peft_model(model=model, peft_config=peft_config) 18 | -------------------------------------------------------------------------------- /.github/workflows/quality.yaml: -------------------------------------------------------------------------------- 1 | name: Quality Checks 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | cancel-in-progress: true 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | 15 | env: 16 | UV_TORCH_BACKEND: cpu 17 | 18 | jobs: 19 | quality: 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - name: Checkout 24 | uses: actions/checkout@v4 25 | 26 | - name: Install uv 27 | uses: astral-sh/setup-uv@v6 28 | with: 29 | enable-cache: true 30 | 31 | - name: Run quality checks 32 | run: | 33 | make quality 34 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import BackendConfig 2 | from .ipex.config import IPEXConfig 3 | from .llama_cpp.config import LlamaCppConfig 4 | from .onnxruntime.config import ONNXRuntimeConfig 5 | from .openvino.config import OpenVINOConfig 6 | from .py_txi.config import PyTXIConfig 7 | from .pytorch.config import PyTorchConfig 8 | from .tensorrt_llm.config import TRTLLMConfig 9 | from .vllm.config import VLLMConfig 10 | 11 | __all__ = [ 12 | "PyTorchConfig", 13 | "ONNXRuntimeConfig", 14 | "IPEXConfig", 15 | "OpenVINOConfig", 16 | "TRTLLMConfig", 17 | "PyTXIConfig", 18 | "BackendConfig", 19 | "VLLMConfig", 20 | "LlamaCppConfig", 21 | ] 22 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/inline/launcher.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, List 2 | 3 | from ...benchmark.report import BenchmarkReport 4 | from ..base import Launcher 5 | from .config import InlineConfig 6 | 7 | 8 | class InlineLauncher(Launcher[InlineConfig]): 9 | NAME = "inline" 10 | 11 | def __init__(self, config: InlineConfig): 12 | super().__init__(config) 13 | 14 | def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport: 15 | self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking") 16 | 17 | report = worker(*worker_args) 18 | 19 | return report 20 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_llama_compile_model.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_pytorch_llama_compile_model 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | torch_compile: true 20 | torch_dtype: bfloat16 21 | task: feature-extraction 22 | torch_compile_target: model 23 | model: NousResearch/Llama-2-13b-hf 24 | 25 | scenario: 26 | input_shapes: 27 | batch_size: 4 28 | sequence_length: 256 29 | 30 | forward_kwargs: 31 | use_cache: false 32 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from logging import getLogger 3 | from typing import ClassVar, Generic 4 | 5 | from ..backends.base import Backend 6 | from ..benchmark.report import BenchmarkReport 7 | from .config import ScenarioConfigT 8 | 9 | 10 | class Scenario(Generic[ScenarioConfigT], ABC): 11 | NAME: ClassVar[str] 12 | 13 | def __init__(self, config: ScenarioConfigT) -> None: 14 | self.config = config 15 | self.logger = getLogger(self.NAME) 16 | self.logger.info(f"Allocating {self.NAME} scenario") 17 | 18 | def run(self, backend: Backend) -> BenchmarkReport: 19 | raise NotImplementedError("Scenario must implement run method") 20 | -------------------------------------------------------------------------------- /energy_star/summarization.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: summarization 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | task: summarization 20 | model: sshleifer/distilbart-cnn-12-6 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/summarization 24 | text_column_name: text 25 | num_samples: 1000 26 | truncation: True 27 | 28 | input_shapes: 29 | batch_size: 1 30 | 31 | generate_kwargs: 32 | max_length: 10 33 | min_new_tokens: 10 34 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_llama_compile_regions.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_pytorch_llama_compile_regions 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | torch_compile: true 20 | torch_dtype: bfloat16 21 | task: feature-extraction 22 | torch_compile_target: regions 23 | model: NousResearch/Llama-2-13b-hf 24 | 25 | scenario: 26 | input_shapes: 27 | batch_size: 4 28 | sequence_length: 256 29 | 30 | forward_kwargs: 31 | use_cache: false 32 | -------------------------------------------------------------------------------- /optimum_benchmark/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __version__ = "0.7.0.dev0" 16 | -------------------------------------------------------------------------------- /energy_star/sentence_similarity.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: sentence_similarity_udever-bloom-7b1 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | library: transformers 20 | task: sentence-similarity 21 | model: sentence-transformers/all-MiniLM-L6-v2 22 | 23 | scenario: 24 | dataset_name: EnergyStarAI/sentence_similarity 25 | sentence1_column_name: sentence1 26 | sentence2_column_name: sentence2 27 | num_samples: 1000 28 | 29 | input_shapes: 30 | batch_size: 1 31 | -------------------------------------------------------------------------------- /energy_star/t5_question_answering.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: question_answering_t5 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: google-t5/t5-large 20 | task: text2text-generation 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/extractive_qa 24 | question_column_name: question 25 | context_column_name: context 26 | dataset_prefix1: "question: " 27 | dataset_prefix2: " context: " 28 | t5_task: question_answering 29 | num_samples: 1000 30 | 31 | input_shapes: 32 | batch_size: 1 33 | -------------------------------------------------------------------------------- /energy_star/t5_summarization.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: summarization_t5 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: google-t5/t5-large 20 | task: text2text-generation 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/summarization 24 | dataset_prefix1: "summarize: " 25 | text_column_name: text 26 | t5_task: summarization 27 | num_samples: 1000 28 | truncation: True 29 | 30 | input_shapes: 31 | batch_size: 1 32 | 33 | generate_kwargs: 34 | max_new_tokens: 10 35 | min_new_tokens: 10 36 | -------------------------------------------------------------------------------- /energy_star/t5_text_generation.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: text2text_generation_aya 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: google-t5/t5-large 20 | task: text2text-generation 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/text_generation 24 | t5_task: text_generation 25 | text_column_name: text 26 | dataset_prefix1: "" 27 | num_samples: 1000 28 | truncation: True 29 | 30 | input_shapes: 31 | batch_size: 1 32 | 33 | generate_kwargs: 34 | max_new_tokens: 10 35 | min_new_tokens: 10 36 | -------------------------------------------------------------------------------- /examples/cpu_ipex_llama.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: ipex 6 | - _base_ 7 | - _self_ 8 | 9 | name: cpu_ipex_llama 10 | 11 | launcher: 12 | numactl: true 13 | numactl_kwargs: 14 | cpunodebind: 0 15 | membind: 0 16 | 17 | backend: 18 | device: cpu 19 | no_weights: false # on multi-node machines, intializing weights in the benchmark could harm performance 20 | torch_dtype: float32 # use bfloat16 on compatible Intel CPUs 21 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 22 | 23 | scenario: 24 | memory: true 25 | latency: true 26 | 27 | input_shapes: 28 | batch_size: 1 29 | sequence_length: 64 30 | 31 | generate_kwargs: 32 | max_new_tokens: 32 33 | min_new_tokens: 32 34 | -------------------------------------------------------------------------------- /examples/cuda_vllm_llama.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: vllm 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_vllm_llama 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | serving_mode: online 20 | model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 21 | engine_args: 22 | enforce_eager: true # remove for better perf but bigger memory footprint 23 | 24 | scenario: 25 | input_shapes: 26 | batch_size: 1 27 | sequence_length: 64 28 | 29 | generate_kwargs: 30 | max_new_tokens: 32 31 | min_new_tokens: 32 32 | 33 | hydra: 34 | job: 35 | env_set: 36 | VLLM_USE_V1: 0 37 | -------------------------------------------------------------------------------- /optimum_benchmark/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | from .energy import Efficiency, Energy, EnergyTracker 2 | from .latency import ( 3 | Latency, 4 | LatencySessionTracker, 5 | LatencyTracker, 6 | PerStepLatencySessionTrackerPipelineCallback, 7 | PerTokenLatencySessionTrackerLogitsProcessor, 8 | StepLatencyTrackerTrainerCallback, 9 | Throughput, 10 | ) 11 | from .memory import Memory, MemoryTracker 12 | 13 | __all__ = [ 14 | "Efficiency", 15 | "Energy", 16 | "EnergyTracker", 17 | "Latency", 18 | "LatencySessionTracker", 19 | "LatencyTracker", 20 | "PerStepLatencySessionTrackerPipelineCallback", 21 | "PerTokenLatencySessionTrackerLogitsProcessor", 22 | "StepLatencyTrackerTrainerCallback", 23 | "Throughput", 24 | "Memory", 25 | "MemoryTracker", 26 | ] 27 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/inline/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from ..config import LauncherConfig 4 | 5 | 6 | @dataclass 7 | class InlineConfig(LauncherConfig): 8 | name: str = "inline" 9 | _target_: str = "optimum_benchmark.launchers.inline.launcher.InlineLauncher" 10 | 11 | def __post_init__(self): 12 | super().__post_init__() 13 | 14 | if self.device_isolation: 15 | raise ValueError( 16 | "Device isolation is not supported with the inline launcher. Use `process` launcher instead." 17 | ) 18 | 19 | if self.device_isolation_action is not None: 20 | raise ValueError( 21 | "Device isolation is not supported with the inline launcher. Use `process` launcher instead." 22 | ) 23 | -------------------------------------------------------------------------------- /tests/configs/_base_.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark # parent schema 3 | - backend: pytorch # default backend 4 | - launcher: process # default launcher 5 | - scenario: inference # default scenario 6 | - _self_ 7 | 8 | print_report: true 9 | log_report: true 10 | 11 | # hydra/cli specific settings 12 | hydra: 13 | run: 14 | # define run directory 15 | dir: runs/${name} 16 | sweep: 17 | # define sweep directory 18 | dir: sweeps/${name} 19 | subdir: ${hydra.job.override_dirname} 20 | job: 21 | # change working directory to the job directory 22 | # so that artifacts are stored there 23 | chdir: true 24 | env_set: 25 | # set environment variable OVERRIDE_BENCHMARKS to 1 26 | # to not skip benchmarks that have been run before 27 | OVERRIDE_BENCHMARKS: 1 28 | -------------------------------------------------------------------------------- /energy_star/t5_text_classification.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: text_classification_t5 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | model: google-t5/t5-large 20 | task: text2text-generation 21 | 22 | scenario: 23 | dataset_name: EnergyStarAI/text_classification 24 | dataset_prefix1: "sst2 sentence: " 25 | t5_task: text_classification 26 | text_column_name: text 27 | 28 | num_samples: 1000 29 | truncation: True 30 | 31 | input_shapes: 32 | batch_size: 1 33 | 34 | generate_kwargs: 35 | max_new_tokens: 10 36 | min_new_tokens: 10 37 | -------------------------------------------------------------------------------- /energy_star/text_generation.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - backend: pytorch 4 | - launcher: process 5 | - scenario: energy_star 6 | - _base_ 7 | - _self_ 8 | 9 | name: text_generation 10 | 11 | launcher: 12 | device_isolation: False 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: False 19 | task: text-generation 20 | model: openai/gpt-oss-20b 21 | torch_dtype: auto 22 | device_map: auto 23 | 24 | scenario: 25 | dataset_name: EnergyStarAI/text_generation 26 | text_column_name: text 27 | num_samples: 1000 28 | truncation: True 29 | reasoning: True 30 | reasoning_params: 31 | reasoning_effort: high 32 | 33 | input_shapes: 34 | batch_size: 1 35 | 36 | generate_kwargs: 37 | max_new_tokens: 10 38 | min_new_tokens: 10 39 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_vlm.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - benchmark 3 | - scenario: inference 4 | - launcher: process 5 | - backend: pytorch 6 | - _base_ 7 | - _self_ 8 | 9 | name: cuda_pytorch_vlm 10 | 11 | launcher: 12 | device_isolation: true 13 | device_isolation_action: warn 14 | 15 | backend: 16 | device: cuda 17 | device_ids: 0 18 | no_weights: true 19 | torch_dtype: float16 20 | model: Qwen/Qwen2-VL-7B-Instruct 21 | 22 | scenario: 23 | memory: true 24 | latency: true 25 | 26 | warmup_runs: 10 27 | iterations: 10 28 | duration: 10 29 | 30 | input_shapes: 31 | # text 32 | batch_size: 1 33 | sequence_length: 64 34 | # image 35 | num_images: 2 36 | num_channels: 3 37 | height: 224 38 | width: 224 39 | 40 | generate_kwargs: 41 | max_new_tokens: 32 42 | min_new_tokens: 32 43 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/llama_cpp/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | from ...import_utils import llama_cpp_version 5 | from ..config import BackendConfig 6 | 7 | 8 | @dataclass 9 | class LlamaCppConfig(BackendConfig): 10 | name: str = "llama_cpp" 11 | version: Optional[str] = llama_cpp_version() 12 | _target_: str = "optimum_benchmark.backends.llama_cpp.backend.LlamaCppBackend" 13 | 14 | no_weights: bool = False 15 | 16 | # llamamodel kwargs 17 | filename: Optional[str] = None 18 | 19 | def __post_init__(self): 20 | self.library = "llama_cpp" 21 | self.model_type = "llama_cpp" 22 | 23 | super().__post_init__() 24 | 25 | if self.task not in ["feature-extraction", "text-generation"]: 26 | raise NotImplementedError(f"Task {self.task} is not supported by LlamaCpp backend.") 27 | 28 | if self.no_weights: 29 | raise NotImplementedError("`no_weights` benchmarking is not supported by LlamaCpp backend.") 30 | -------------------------------------------------------------------------------- /optimum_benchmark/benchmark/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Dict 3 | 4 | from ..hub_utils import PushToHubMixin, classproperty 5 | from ..import_utils import get_hf_libs_info 6 | from ..system_utils import get_system_info 7 | 8 | 9 | @dataclass 10 | class BenchmarkConfig(PushToHubMixin): 11 | name: str 12 | 13 | # BACKEND CONFIGURATION 14 | backend: Any # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386 15 | # SCENARIO CONFIGURATION 16 | scenario: Any # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386 17 | # LAUNCHER CONFIGURATION 18 | launcher: Any # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386 19 | 20 | # ENVIRONMENT CONFIGURATION 21 | environment: Dict[str, Any] = field(default_factory=lambda: {**get_system_info(), **get_hf_libs_info()}) 22 | 23 | print_report: bool = False 24 | log_report: bool = True 25 | 26 | @classproperty 27 | def default_filename(cls) -> str: 28 | return "benchmark_config.json" 29 | -------------------------------------------------------------------------------- /optimum_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | from .backends import ( 2 | BackendConfig, 3 | IPEXConfig, 4 | LlamaCppConfig, 5 | ONNXRuntimeConfig, 6 | OpenVINOConfig, 7 | PyTorchConfig, 8 | PyTXIConfig, 9 | TRTLLMConfig, 10 | VLLMConfig, 11 | ) 12 | from .benchmark.base import Benchmark 13 | from .benchmark.config import BenchmarkConfig 14 | from .benchmark.report import BenchmarkReport 15 | from .launchers import InlineConfig, LauncherConfig, ProcessConfig, TorchrunConfig 16 | from .scenarios import EnergyStarConfig, InferenceConfig, ScenarioConfig, TrainingConfig 17 | 18 | __all__ = [ 19 | "BackendConfig", 20 | "Benchmark", 21 | "BenchmarkConfig", 22 | "BenchmarkReport", 23 | "EnergyStarConfig", 24 | "InferenceConfig", 25 | "IPEXConfig", 26 | "InlineConfig", 27 | "LauncherConfig", 28 | "ONNXRuntimeConfig", 29 | "OpenVINOConfig", 30 | "ProcessConfig", 31 | "PyTorchConfig", 32 | "PyTXIConfig", 33 | "ScenarioConfig", 34 | "TorchrunConfig", 35 | "TrainingConfig", 36 | "TRTLLMConfig", 37 | "VLLMConfig", 38 | "LlamaCppConfig", 39 | ] 40 | -------------------------------------------------------------------------------- /.github/workflows/security.yml: -------------------------------------------------------------------------------- 1 | name: Security Checks 2 | 3 | on: 4 | push: 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | trufflehog: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - shell: bash 14 | run: | 15 | if [ "$EVENT_NAME" == "push" ]; then 16 | echo "depth=$(($(jq length <<< $COMMITS)+2))" >> $GITHUB_ENV 17 | echo "branch=$REF" >> $GITHUB_ENV 18 | fi 19 | if [ "$EVENT_NAME" == "pull_request" ]; then 20 | echo "depth=$(($PR_COMMITS+2))" >> $GITHUB_ENV 21 | echo "branch=$PR_REF" >> $GITHUB_ENV 22 | fi 23 | env: 24 | REF: ${{ github.ref_name }} 25 | COMMITS: ${{ tojson(github.event.commits) }} 26 | EVENT_NAME: ${{ github.event_name }} 27 | PR_REF: ${{ github.event.pull_request.head.ref }} 28 | PR_COMMITS: ${{ github.event.pull_request.commits }} 29 | 30 | - name: Checkout code 31 | uses: actions/checkout@v4 32 | with: 33 | ref: ${{env.branch}} 34 | fetch-depth: ${{env.depth}} 35 | 36 | - name: Scan for secrets 37 | uses: trufflesecurity/trufflehog@main 38 | -------------------------------------------------------------------------------- /.github/workflows/test_energy_star.yaml: -------------------------------------------------------------------------------- 1 | name: Energy Star Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_energy_star: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'energy_star') 31 | }} 32 | 33 | runs-on: ubuntu-latest 34 | 35 | steps: 36 | - name: Checkout 37 | uses: actions/checkout@v4 38 | 39 | - name: Install ffmpeg 40 | run: sudo apt-get install -y ffmpeg 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run energy star 48 | run: | 49 | make test-energy-star 50 | -------------------------------------------------------------------------------- /.github/workflows/test_api_rocm.yaml: -------------------------------------------------------------------------------- 1 | name: API ROCm Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | jobs: 23 | run_api_rocm_tests: 24 | if: ${{ 25 | (github.event_name == 'push') || 26 | (github.event_name == 'workflow_dispatch') || 27 | contains( github.event.pull_request.labels.*.name, 'api') || 28 | contains( github.event.pull_request.labels.*.name, 'rocm') || 29 | contains( github.event.pull_request.labels.*.name, 'api_rocm') 30 | }} 31 | 32 | uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing 33 | with: 34 | test_file: test_api.py 35 | machine_type: single-gpu 36 | pytest_keywords: api and cuda 37 | install_extras: testing,timm,diffusers,codecarbon 38 | secrets: 39 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 40 | -------------------------------------------------------------------------------- /scripts/total_tests_runs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import yaml 4 | 5 | config_dir = "tests/configs" 6 | config_files = [f for f in os.listdir(config_dir) if not f.startswith("_")] 7 | 8 | run_counts = {} 9 | for config_file in config_files: 10 | with open(os.path.join(config_dir, config_file), "r") as f: 11 | config = yaml.safe_load(f) 12 | 13 | for default in config.get("defaults", []): 14 | if isinstance(default, str) and default != "_self_": 15 | with open(os.path.join(config_dir, f"{default}.yaml"), "r") as f: 16 | default_config = yaml.safe_load(f) 17 | params = default_config.get("hydra", {}).get("sweeper", {}).get("params", {}) 18 | 19 | if len(params) == 0: 20 | run_counts[config_file] = run_counts.get(config_file, 1) 21 | else: 22 | for param_values in params.values(): 23 | run_counts[config_file] = run_counts.get(config_file, 1) * len(param_values.split(",")) 24 | 25 | 26 | for config_file, run_count in run_counts.items(): 27 | print(f"{config_file}: {run_count} runs") 28 | 29 | print(f"Total runs: {sum(run_counts.values())}") 30 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/openvino/utils.py: -------------------------------------------------------------------------------- 1 | TASKS_TO_OPENVINO_MODELS = { 2 | "fill-mask": "optimum.intel.openvino.OVModelForMaskedLM", 3 | "text-generation": "optimum.intel.openvino.OVModelForCausalLM", 4 | "text2text-generation": "optimum.intel.openvino.OVModelForSeq2SeqLM", 5 | "feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction", 6 | "text-classification": "optimum.intel.openvino.OVModelForSequenceClassification", 7 | "token-classification": "optimum.intel.openvino.OVModelForTokenClassification", 8 | "question-answering": "optimum.intel.openvino.OVModelForQuestionAnswering", 9 | "image-classification": "optimum.intel.openvino.OVModelForImageClassification", 10 | "image-text-to-text": "optimum.intel.openvino.OVModelForVisualCausalLM", 11 | "audio-classification": "optimum.intel.openvino.OVModelForAudioClassification", 12 | "pix2struct": "optimum.intel.openvino.OVModelForPix2Struct", 13 | } 14 | TASKS_TO_OPENVINO_PIPELINES = { 15 | "inpainting": "optimum.intel.openvino.OVPipelineForInpainting", 16 | "text-to-image": "optimum.intel.openvino.OVPipelineForText2Image", 17 | "image-to-image": "optimum.intel.openvino.OVPipelineForImage2Image", 18 | } 19 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/ipex/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | from ...import_utils import ipex_version 5 | from ..config import BackendConfig 6 | 7 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"] 8 | 9 | 10 | @dataclass 11 | class IPEXConfig(BackendConfig): 12 | name: str = "ipex" 13 | version: Optional[str] = ipex_version() 14 | _target_: str = "optimum_benchmark.backends.ipex.backend.IPEXBackend" 15 | 16 | no_weights: bool = False 17 | 18 | # ipexmodel kwargs 19 | torch_dtype: Optional[str] = None 20 | 21 | def __post_init__(self): 22 | super().__post_init__() 23 | 24 | self.device = self.device.lower() 25 | 26 | if self.device not in ["cpu", "xpu"]: 27 | raise ValueError(f"IPEXBackend only supports CPU and XPU devices. Got {self.device} instead.") 28 | 29 | if self.model_kwargs.get("torch_dtype", None) is not None: 30 | raise ValueError( 31 | "`torch_dtype` is an explicit argument in the PyTorch backend config. " 32 | "Please remove it from the `model_kwargs` and set it in the backend config directly." 33 | ) 34 | 35 | if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES: 36 | raise ValueError(f"`torch_dtype` must be one of {TORCH_DTYPES}. Got {self.torch_dtype} instead.") 37 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_ipex.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU IPEX Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_ipex_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'ipex') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_ipex') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-cli-cpu-ipex 50 | 51 | - name: Run examples 52 | run: | 53 | make test-cli-cpu-ipex-examples 54 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_pytorch.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU PyTorch tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_pytorch_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'pytorch') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_pytorch') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-cli-cpu-pytorch 50 | 51 | - name: Run examples 52 | run: | 53 | make test-cli-cpu-pytorch-examples 54 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_openvino.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU OpenVINO Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_openvino_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'openvino') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_openvino') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-cli-cpu-openvino 50 | 51 | - name: Run examples 52 | run: | 53 | make test-cli-cpu-openvino-examples 54 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_llama_cpp.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU LlamaCpp Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_llama_cpp_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'llama_cpp') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_llama_cpp') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-cli-cpu-llama-cpp 50 | 51 | - name: Run examples 52 | run: | 53 | make test-cli-cpu-llama-cpp-examples 54 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_onnxruntime.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU ONNXRuntime Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_onnxruntime_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'onnxruntime') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_onnxruntime') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-cli-cpu-onnxruntime 50 | 51 | - name: Run examples 52 | run: | 53 | make test-cli-cpu-onnxruntime-examples 54 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cuda_py_txi.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CUDA Py-TXI Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: auto 24 | 25 | jobs: 26 | run_cli_cuda_py_txi_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cuda') || 32 | contains( github.event.pull_request.labels.*.name, 'py_txi') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_py_txi') 34 | }} 35 | 36 | runs-on: 37 | group: aws-g5-4xlarge-plus 38 | 39 | steps: 40 | - name: Checkout 41 | uses: actions/checkout@v4 42 | 43 | - name: Install uv 44 | uses: astral-sh/setup-uv@v6 45 | with: 46 | enable-cache: true 47 | 48 | - name: Run tests 49 | run: | 50 | make test-cli-cuda-py-txi 51 | 52 | - name: Run examples 53 | run: | 54 | make test-cli-cuda-py-txi-examples 55 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cuda_onnxruntime.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CUDA ONNXRuntime Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: auto 24 | 25 | jobs: 26 | run_cli_cuda_onnxruntime_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cuda') || 32 | contains( github.event.pull_request.labels.*.name, 'onnxruntime') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_onnxruntime') 34 | }} 35 | 36 | runs-on: 37 | group: aws-g5-4xlarge-plus 38 | 39 | steps: 40 | - name: Checkout 41 | uses: actions/checkout@v4 42 | 43 | - name: Install uv 44 | uses: astral-sh/setup-uv@v6 45 | with: 46 | enable-cache: true 47 | 48 | - name: Run tests 49 | run: | 50 | make test-cli-cuda-onnxruntime 51 | 52 | - name: Run examples 53 | run: | 54 | make test-cli-cuda-onnxruntime-examples 55 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/diffusers_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | from ..import_utils import is_diffusers_available 4 | from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym_task 5 | 6 | if is_diffusers_available(): 7 | import diffusers 8 | from diffusers import DiffusionPipeline 9 | 10 | 11 | def get_diffusers_auto_pipeline_class_for_task(task: str): 12 | task = map_from_synonym_task(task) 13 | 14 | if not is_diffusers_available(): 15 | raise ImportError("diffusers is not available. Please, pip install diffusers.") 16 | 17 | if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES: 18 | raise ValueError(f"Task {task} not supported for diffusers") 19 | 20 | model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES[task] 21 | 22 | return getattr(diffusers, model_loader_name) 23 | 24 | 25 | def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]: 26 | if not is_diffusers_available(): 27 | raise ImportError("diffusers is not available. Please, pip install diffusers.") 28 | 29 | config = DiffusionPipeline.load_config(model, **kwargs) 30 | pipeline_config = config[0] if isinstance(config, tuple) else config 31 | return pipeline_config 32 | 33 | 34 | def extract_diffusers_shapes_from_model(**kwargs) -> Dict[str, int]: 35 | if not is_diffusers_available(): 36 | raise ImportError("diffusers is not available. Please, pip install diffusers.") 37 | 38 | shapes = {} 39 | 40 | return shapes 41 | -------------------------------------------------------------------------------- /.github/workflows/test_api_cpu.yaml: -------------------------------------------------------------------------------- 1 | name: API CPU Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_api_cpu_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'api') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'api_cpu') 33 | }} 34 | 35 | runs-on: ubuntu-latest 36 | 37 | steps: 38 | - name: Checkout 39 | uses: actions/checkout@v4 40 | 41 | - name: Install uv 42 | uses: astral-sh/setup-uv@v6 43 | with: 44 | enable-cache: true 45 | 46 | - name: Run tests 47 | run: | 48 | make test-api-cpu 49 | env: 50 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 51 | PUSH_REPO_ID: optimum-benchmark/cpu 52 | 53 | - name: Run examples 54 | run: | 55 | make test-api-cpu-examples 56 | env: 57 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 58 | PUSH_REPO_ID: optimum-benchmark/cpu 59 | -------------------------------------------------------------------------------- /optimum_benchmark/generators/input_generator.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | from .base import BaseGenerator 4 | from .model_generator import MODEL_TYPE_TO_GENERATORS 5 | from .task_generator import TASKS_TO_GENERATORS 6 | 7 | 8 | class InputGenerator: 9 | generator: BaseGenerator 10 | 11 | def __init__( 12 | self, 13 | task: str, 14 | input_shapes: Dict[str, int], 15 | model_shapes: Dict[str, int], 16 | model_type: Optional[str] = None, 17 | ) -> None: 18 | # input_shapes take precedence over model_shapes 19 | all_shapes = {**model_shapes, **input_shapes} 20 | 21 | if model_type in MODEL_TYPE_TO_GENERATORS: 22 | self.generator = MODEL_TYPE_TO_GENERATORS[model_type](shapes=all_shapes, with_labels=False) 23 | elif task in TASKS_TO_GENERATORS: 24 | self.generator = TASKS_TO_GENERATORS[task](shapes=all_shapes, with_labels=False) 25 | else: 26 | raise NotImplementedError( 27 | f"Task {task} is not supported for input generation. " 28 | f"Available tasks: {list(TASKS_TO_GENERATORS.keys())}. " 29 | f"Available model types: {list(MODEL_TYPE_TO_GENERATORS.keys())}. " 30 | "If you want to add support for this task or model type, " 31 | "please submit a PR or a feature request to optimum-benchmark." 32 | ) 33 | 34 | def __call__(self) -> Dict[str, Any]: 35 | task_input = self.generator() 36 | return task_input 37 | -------------------------------------------------------------------------------- /.github/workflows/test_api_cuda.yaml: -------------------------------------------------------------------------------- 1 | name: API CUDA Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_api_cuda_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'api') || 31 | contains( github.event.pull_request.labels.*.name, 'cuda') || 32 | contains( github.event.pull_request.labels.*.name, 'api_cuda') 33 | }} 34 | 35 | runs-on: 36 | group: aws-g5-4xlarge-plus 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | 42 | - name: Install uv 43 | uses: astral-sh/setup-uv@v6 44 | with: 45 | enable-cache: true 46 | 47 | - name: Run tests 48 | run: | 49 | make test-api-cuda 50 | env: 51 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 52 | PUSH_REPO_ID: optimum-benchmark/cuda 53 | 54 | - name: Run examples 55 | run: | 56 | make test-api-cuda-examples 57 | env: 58 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 59 | PUSH_REPO_ID: optimum-benchmark/cuda 60 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_bert.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig 4 | from optimum_benchmark.logging_utils import setup_logging 5 | 6 | BENCHMARK_NAME = "cuda_pytorch_bert" 7 | MODEL = "google-bert/bert-base-uncased" 8 | PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None) 9 | 10 | 11 | if __name__ == "__main__": 12 | level = os.environ.get("LOG_LEVEL", "INFO") 13 | to_file = os.environ.get("LOG_TO_FILE", "0") == "1" 14 | setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS") 15 | 16 | launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") 17 | backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL) 18 | scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128}) 19 | benchmark_config = BenchmarkConfig( 20 | name=BENCHMARK_NAME, 21 | launcher=launcher_config, 22 | scenario=scenario_config, 23 | backend=backend_config, 24 | print_report=True, 25 | log_report=True, 26 | ) 27 | benchmark_report = Benchmark.launch(benchmark_config) 28 | benchmark = Benchmark(config=benchmark_config, report=benchmark_report) 29 | 30 | if PUSH_REPO_ID is not None: 31 | benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) 32 | benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) 33 | benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) 34 | -------------------------------------------------------------------------------- /.github/workflows/test_api_misc.yaml: -------------------------------------------------------------------------------- 1 | name: API Misc Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_api_misc_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'api') || 31 | contains( github.event.pull_request.labels.*.name, 'misc') || 32 | contains( github.event.pull_request.labels.*.name, 'api_misc') 33 | }} 34 | 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | os: [ubuntu-latest, "macos-latest", windows-latest] 39 | python: ["3.10", "3.12"] 40 | 41 | name: API Misc Tests - OS ${{ matrix.os }} - Python ${{ matrix.python }} 42 | 43 | runs-on: ${{ matrix.os }} 44 | 45 | steps: 46 | - name: Checkout 47 | uses: actions/checkout@v4 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v6 51 | with: 52 | enable-cache: true 53 | python-version: ${{ matrix.python }} 54 | 55 | - name: Run tests 56 | run: | 57 | make test-api-misc 58 | env: 59 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 60 | PUSH_REPO_ID: optimum-benchmark/misc-${{ matrix.os }}-${{ matrix.python }} 61 | -------------------------------------------------------------------------------- /scripts/update_ci_badges.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | 5 | # list all workflow files 6 | workflow_dir = ".github/workflows" 7 | workflow_files = os.listdir(workflow_dir) 8 | 9 | # generate the markdown for the badges 10 | base_url = "https://github.com/huggingface/optimum-benchmark/actions/workflows" 11 | api_badges = [] 12 | cli_badges = [] 13 | for file in workflow_files: 14 | # extract the name from the file name 15 | name = re.sub(r"(test_|\.yaml)", "", file).upper() 16 | badge_url = f"{base_url}/{file}/badge.svg" 17 | workflow_url = f"{base_url}/{file}" 18 | badge = f"[![{name}]({badge_url})]({workflow_url})" 19 | if "api" in file: 20 | api_badges.append(badge) 21 | elif "cli" in file: 22 | cli_badges.append(badge) 23 | 24 | # order the badges 25 | api_badges = sorted(api_badges) 26 | cli_badges = sorted(cli_badges) 27 | 28 | # read the README file 29 | readme_path = Path("README.md") 30 | readme_text = readme_path.read_text() 31 | 32 | # find the position to insert the badges 33 | api_start_pos = readme_text.index("### API 📈") + len("### API 📈\n\n") 34 | api_end_pos = readme_text.index("#", api_start_pos) 35 | cli_start_pos = readme_text.index("### CLI 📈") + len("### CLI 📈\n\n") 36 | cli_end_pos = readme_text.index("#", cli_start_pos) 37 | 38 | # insert the badges into the README text 39 | new_readme_text = ( 40 | readme_text[:api_start_pos] 41 | + "\n".join(api_badges) 42 | + "\n\n" 43 | + readme_text[api_end_pos:cli_start_pos] 44 | + "\n".join(cli_badges) 45 | + "\n\n" 46 | + readme_text[cli_end_pos:] 47 | ) 48 | 49 | # write the new README text to the file 50 | readme_path.write_text(new_readme_text) 51 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_misc.yaml: -------------------------------------------------------------------------------- 1 | name: CLI Misc Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_misc_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'misc') || 32 | contains( github.event.pull_request.labels.*.name, 'cli_misc') 33 | }} 34 | 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | os: [ubuntu-latest, "macos-latest", windows-latest] 39 | python: ["3.10", "3.12"] 40 | 41 | name: CLI Misc Tests - OS ${{ matrix.os }} - Python ${{ matrix.python }} 42 | 43 | runs-on: ${{ matrix.os }} 44 | 45 | steps: 46 | - name: Checkout 47 | uses: actions/checkout@v4 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v6 51 | with: 52 | enable-cache: true 53 | python-version: ${{ matrix.python }} 54 | 55 | - name: Install Linux packages 56 | if: matrix.os == 'ubuntu-latest' 57 | run: | 58 | sudo apt-get update 59 | sudo apt-get install -y numactl 60 | 61 | - name: Run tests 62 | run: | 63 | make test-cli-misc 64 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cpu_py_txi.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CPU Py-TXI Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: cpu 24 | 25 | jobs: 26 | run_cli_cpu_py_txi_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cpu') || 32 | contains( github.event.pull_request.labels.*.name, 'py_txi') || 33 | contains( github.event.pull_request.labels.*.name, 'cli_cpu_py_txi') 34 | }} 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - name: Free Disk Space (Ubuntu) 40 | uses: jlumbroso/free-disk-space@main 41 | 42 | - name: Checkout 43 | uses: actions/checkout@v4 44 | 45 | - name: Install uv 46 | uses: astral-sh/setup-uv@v6 47 | with: 48 | enable-cache: true 49 | 50 | - name: Pull images 51 | run: | 52 | docker pull ghcr.io/huggingface/text-generation-inference:3.3-intel-cpu 53 | docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.8 54 | 55 | - name: Run tests 56 | run: | 57 | make test-cli-cpu-py-txi 58 | 59 | - name: Run examples 60 | run: | 61 | make test-cli-cpu-py-txi-examples 62 | -------------------------------------------------------------------------------- /optimum_benchmark/profilers/fx_profiler.py: -------------------------------------------------------------------------------- 1 | import time 2 | from logging import getLogger 3 | from typing import Any, List, Tuple 4 | 5 | import torch 6 | from torch.fx import Interpreter 7 | from torch.fx.graph_module import GraphModule 8 | from torch.fx.node import Node 9 | 10 | LOGGER = getLogger("fx_profiler") 11 | 12 | 13 | class FXProfilingWrapper(Interpreter): 14 | def __init__(self, module: GraphModule): 15 | super().__init__(module) 16 | self.profiling_records: List[Tuple[str, str, float]] = [] 17 | 18 | def run(self, *args) -> Any: 19 | return super().run(*args) 20 | 21 | def run_node(self, node: Node) -> Any: 22 | if self.module.device.type == "cuda": 23 | start = torch.cuda.Event(enable_timing=True) 24 | end = torch.cuda.Event(enable_timing=True) 25 | start.record(stream=torch.cuda.current_stream()) 26 | return_val = super().run_node(node) 27 | end.record(stream=torch.cuda.current_stream()) 28 | torch.cuda.synchronize() 29 | node_runtime = start.elapsed_time(end) / 1e3 30 | else: 31 | start = time.perf_counter_ns() 32 | return_val = super().run_node(node) 33 | end = time.perf_counter_ns() 34 | node_runtime = (end - start) / 1e9 35 | 36 | LOGGER.debug(f"Node {node.name} took {node_runtime:.2e} seconds") 37 | self.profiling_records.append((node.name, node.op, node_runtime)) 38 | 39 | return return_val 40 | 41 | def __call__(self, **kwargs) -> Any: 42 | args = kwargs.values() 43 | return super().run(*args) 44 | 45 | def get_profiling_records(self) -> List[Tuple[str, str, float]]: 46 | return self.profiling_records 47 | -------------------------------------------------------------------------------- /optimum_benchmark/generators/dataset_generator.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from datasets import Dataset 4 | 5 | from .base import BaseGenerator 6 | from .model_generator import MODEL_TYPE_TO_GENERATORS 7 | from .task_generator import TASKS_TO_GENERATORS 8 | 9 | 10 | class DatasetGenerator: 11 | generator: BaseGenerator 12 | 13 | def __init__( 14 | self, 15 | task: str, 16 | dataset_shapes: Dict[str, int], 17 | model_shapes: Dict[str, int], 18 | model_type: Optional[str] = None, 19 | ) -> None: 20 | # dataset_shapes take precedence over model_shapes 21 | all_shapes = {**model_shapes, **dataset_shapes} 22 | all_shapes["batch_size"] = all_shapes.pop("dataset_size", None) 23 | 24 | if model_type in MODEL_TYPE_TO_GENERATORS: 25 | self.generator = MODEL_TYPE_TO_GENERATORS[model_type](shapes=all_shapes, with_labels=True) 26 | elif task in TASKS_TO_GENERATORS: 27 | self.generator = TASKS_TO_GENERATORS[task](shapes=all_shapes, with_labels=True) 28 | else: 29 | raise NotImplementedError( 30 | f"Task {task} is not supported for dataset generation. " 31 | f"Available tasks: {list(TASKS_TO_GENERATORS.keys())}. " 32 | f"Available model types: {list(MODEL_TYPE_TO_GENERATORS.keys())}. " 33 | "If you want to add support for this task or model type, " 34 | "please submit a PR or a feature request to optimum-benchmark." 35 | ) 36 | 37 | def __call__(self) -> Dataset: 38 | task_dataset = self.generator() 39 | task_dataset = Dataset.from_dict(task_dataset) 40 | task_dataset.set_format(type="torch", columns=list(task_dataset.features.keys())) 41 | return task_dataset 42 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/config.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from dataclasses import dataclass, field 3 | from logging import getLogger 4 | from typing import Any, Dict, Optional, TypeVar 5 | 6 | from ..system_utils import is_nvidia_system, is_rocm_system 7 | 8 | LOGGER = getLogger("launcher") 9 | 10 | 11 | @dataclass 12 | class LauncherConfig(ABC): 13 | name: str 14 | _target_: str 15 | 16 | device_isolation: bool = False 17 | device_isolation_action: Optional[str] = None 18 | 19 | numactl: bool = False 20 | numactl_kwargs: Dict[str, Any] = field(default_factory=dict) 21 | 22 | def __post_init__(self): 23 | if self.device_isolation and not is_nvidia_system() and not is_rocm_system(): 24 | raise ValueError( 25 | "Device isolation is only supported on NVIDIA and ROCm systems. " 26 | "Please set `device_isolation` to False or make sure your drivers " 27 | "are correctly installed by running `nvidia-smi` or `rocm-smi`." 28 | ) 29 | 30 | if self.device_isolation and self.device_isolation_action is None: 31 | LOGGER.warning( 32 | "Device isolation is enabled but no action is specified. " 33 | "Please set `device_isolation_action` to either `error`, `warn`, or `kill`. " 34 | "Defaulting to `warn`." 35 | ) 36 | self.device_isolation_action = "warn" 37 | 38 | elif self.device_isolation and self.device_isolation_action not in {"error", "warn", "kill"}: 39 | raise ValueError( 40 | f"Unsupported device isolation action {self.device_isolation_action}. " 41 | "Please set `device_isolation_action` to either `error`, `warn`, or `kill`." 42 | ) 43 | 44 | 45 | LauncherConfigT = TypeVar("LauncherConfigT", bound=LauncherConfig) 46 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/onnxruntime/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantizationMode, QuantType 4 | from optimum.pipelines import ORT_SUPPORTED_TASKS 5 | 6 | TASKS_TO_ORTMODELS = { 7 | task: f"optimum.onnxruntime.{task_dict['class'][0].__name__}" for task, task_dict in ORT_SUPPORTED_TASKS.items() 8 | } 9 | 10 | TASKS_TO_ONNXRUNTIME_PIPELINES = { 11 | "inpainting": "optimum.onnxruntime.ORTPipelineForInpainting", 12 | "text-to-image": "optimum.onnxruntime.ORTPipelineForText2Image", 13 | "image-to-image": "optimum.onnxruntime.ORTPipelineForImage2Image", 14 | } 15 | 16 | 17 | def format_calibration_config(calibration_config: Dict[str, Any]) -> None: 18 | if calibration_config.get("method", None) is not None: 19 | calibration_config["method"] = CalibrationMethod[calibration_config["method"]] 20 | 21 | return calibration_config 22 | 23 | 24 | def format_quantization_config(quantization_config: Dict[str, Any]) -> None: 25 | """Format the quantization dictionary for onnxruntime.""" 26 | # the conditionals are here because some quantization strategies don't have all the options 27 | if quantization_config.get("format", None) is not None: 28 | quantization_config["format"] = QuantFormat.from_string(quantization_config["format"]) 29 | if quantization_config.get("mode", None) is not None: 30 | quantization_config["mode"] = QuantizationMode.from_string(quantization_config["mode"]) 31 | if quantization_config.get("activations_dtype", None) is not None: 32 | quantization_config["activations_dtype"] = QuantType.from_string(quantization_config["activations_dtype"]) 33 | if quantization_config.get("weights_dtype", None) is not None: 34 | quantization_config["weights_dtype"] = QuantType.from_string(quantization_config["weights_dtype"]) 35 | 36 | return quantization_config 37 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/tensorrt_llm/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Optional 3 | 4 | from ...import_utils import tesnorrt_llm_version 5 | from ..config import BackendConfig 6 | 7 | SUPPORTED_DTYPES = [None, "float16", "bfloat16", "float32"] 8 | 9 | 10 | @dataclass 11 | class TRTLLMConfig(BackendConfig): 12 | name: str = "tensorrt-llm" 13 | version: Optional[str] = tesnorrt_llm_version() 14 | _target_: str = "optimum_benchmark.backends.tensorrt_llm.backend.TRTLLMBackend" 15 | 16 | no_weights: bool = False 17 | 18 | # trtllm kwargs 19 | tp: Optional[int] = None 20 | pp: Optional[int] = None 21 | dtype: Optional[str] = None 22 | use_fp8: Optional[bool] = None 23 | world_size: Optional[int] = None 24 | gpus_per_node: Optional[int] = None 25 | max_input_len: Optional[int] = None 26 | max_output_len: Optional[int] = None 27 | max_batch_size: Optional[int] = None 28 | max_new_tokens: Optional[int] = None 29 | max_prompt_length: Optional[int] = None 30 | optimization_level: Optional[int] = None 31 | use_cuda_graph: Optional[bool] = None 32 | 33 | def __post_init__(self) -> None: 34 | super().__post_init__() 35 | 36 | if self.device != "cuda": 37 | raise NotImplementedError(f"TRTLLMBackend only supports device cuda, got {self.device}") 38 | 39 | if self.dtype not in SUPPORTED_DTYPES: 40 | raise ValueError(f"dtype must be one of float16, bfloat16, float32, got {self.dtype}") 41 | 42 | if self.gpus_per_node is not None and self.world_size is not None and self.gpus_per_node != self.world_size: 43 | raise ValueError(f"gpus_per_node ({self.gpus_per_node}) != world_size ({self.world_size})") 44 | 45 | if ( 46 | self.world_size is not None 47 | and self.pp is not None 48 | and self.tp is not None 49 | and self.world_size != self.pp * self.tp 50 | ): 51 | raise ValueError(f"world_size ({self.gpus_per_node}) != pp ({self.pp}) * tp ({self.tp})") 52 | -------------------------------------------------------------------------------- /optimum_benchmark/generators/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | import string 4 | from abc import ABC 5 | from typing import Dict, List, Tuple 6 | 7 | import torch 8 | 9 | LOGGER = logging.getLogger("generators") 10 | 11 | 12 | class BaseGenerator(ABC): 13 | def __init__(self, shapes: Dict[str, int], with_labels: bool): 14 | self.shapes = shapes 15 | self.with_labels = with_labels 16 | 17 | def assert_not_missing_shapes(self, required_shapes: List[str]): 18 | for shape in required_shapes: 19 | assert self.shapes.get(shape, None) is not None, ( 20 | f"{shape} either couldn't be inferred automatically from model artifacts or should be provided by the user. " 21 | f"Please provide it under `scenario.input_shapes.{shape}` or open an issue/PR in optimum-benchmark repository. " 22 | ) 23 | 24 | @staticmethod 25 | def generate_constant_integers(value: int, shape: Tuple[int]): 26 | return torch.full(shape, value, dtype=torch.int64) 27 | 28 | @staticmethod 29 | def generate_constant_floats(value: float, shape: Tuple[int]): 30 | return torch.full(shape, value, dtype=torch.float32) 31 | 32 | @staticmethod 33 | def generate_random_integers(min_value: int, max_value: int, shape: Tuple[int]): 34 | return torch.randint(min_value, max_value, shape) 35 | 36 | @staticmethod 37 | def generate_random_floats(min_value: float, max_value: float, shape: Tuple[int]): 38 | return torch.rand(shape) * (max_value - min_value) + min_value 39 | 40 | @staticmethod 41 | def generate_ranges(start: int, stop: int, shape: Tuple[int]): 42 | return torch.arange(start, stop).repeat(shape[0], 1) 43 | 44 | @staticmethod 45 | def generate_random_strings(num_seq: int) -> List[str]: 46 | return [ 47 | "".join(random.choice(string.ascii_letters + string.digits) for _ in range(random.randint(10, 100))) 48 | for _ in range(num_seq) 49 | ] 50 | 51 | def __call__(self): 52 | raise NotImplementedError("Generator must implement __call__ method") 53 | -------------------------------------------------------------------------------- /docker/cpu/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ARG UBUNTU_VERSION=22.04 16 | 17 | FROM ubuntu:${UBUNTU_VERSION} 18 | 19 | # Install necessary packages 20 | ENV DEBIAN_FRONTEND=noninteractive 21 | ENV PATH="/home/user/.local/bin:${PATH}" 22 | RUN apt-get update && apt-get install -y --no-install-recommends \ 23 | sudo build-essential git bash-completion numactl \ 24 | python3.10 python3-pip python3.10-dev google-perftools && \ 25 | apt-get clean && rm -rf /var/lib/apt/lists/* && \ 26 | update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ 27 | pip install --no-cache-dir --upgrade pip setuptools wheel intel-openmp 28 | 29 | ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" 30 | 31 | # Install PyTorch 32 | ARG TORCH_VERSION="" 33 | ARG TORCH_RELEASE_TYPE=stable 34 | 35 | RUN if [ -n "${TORCH_VERSION}" ]; then \ 36 | pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/cpu ; \ 37 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \ 38 | pip install --no-cache-dir torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/cpu ; \ 39 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \ 40 | pip install --no-cache-dir --pre torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/nightly/cpu ; \ 41 | else \ 42 | echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \ 43 | fi 44 | -------------------------------------------------------------------------------- /optimum_benchmark/profilers/ort_profiler.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from typing import List, Tuple 4 | 5 | import pandas as pd 6 | from optimum.onnxruntime import ORTModel 7 | 8 | LOGGER = getLogger("ort_profiler") 9 | 10 | 11 | class ORTProfilingWrapper: 12 | def __init__(self, module: ORTModel): 13 | self.module = module 14 | self.profiling_records: List[Tuple[str, str, float]] = [] 15 | 16 | def __call__(self, *args, **kwargs): 17 | return self.module(*args, **kwargs) 18 | 19 | def get_profiling_records(self) -> List[Tuple[str, str, float]]: 20 | profiling_json = self.module.model.end_profiling() # type: ignore 21 | with open(profiling_json) as file_obj: 22 | profiling_data = json.load(file_obj) 23 | if isinstance(profiling_data, dict): 24 | profiling_data = profiling_data["traceEvents"] 25 | 26 | profiling_records = extract_last_run_records(profiling_data) 27 | return normalize_records(profiling_records) 28 | 29 | 30 | def normalize_records(data) -> List[Tuple[str, str, float]]: 31 | records = [] 32 | for item in data: 33 | cat = item.get("cat") 34 | if cat is None: 35 | continue 36 | dur = item.get("dur") 37 | if dur is None: 38 | continue 39 | arg = item.get("args") 40 | if arg is None: 41 | continue 42 | op_name = arg.get("op_name") 43 | 44 | name = item["name"] 45 | 46 | if cat != "Kernel" and not name.endswith("kernel_time"): 47 | continue 48 | 49 | if cat in ["Kernel", "Node"]: 50 | LOGGER.debug(f"Kernel/Node {name} took {dur / 1e6:.2e} seconds") 51 | records.append((name.replace("_kernel_time", ""), op_name, dur / 1e6)) 52 | 53 | return records 54 | 55 | 56 | def extract_last_run_records(data): 57 | # Here we assume that the traces are properly ordered, so we can simplify the splitting logic. 58 | return ( 59 | pd.DataFrame(data)[["name", "cat", "dur", "args"]] 60 | .groupby("name") 61 | .last() # not sure if this is the right way to do it 62 | .reset_index() 63 | .to_dict(orient="records") 64 | ) 65 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/vllm/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Dict, Optional 3 | 4 | from ...import_utils import vllm_version 5 | from ..config import BackendConfig 6 | 7 | 8 | @dataclass 9 | class VLLMConfig(BackendConfig): 10 | name: str = "vllm" 11 | version: Optional[str] = vllm_version() 12 | _target_: str = "optimum_benchmark.backends.vllm.backend.VLLMBackend" 13 | 14 | # creates a model from scratch with dummy weights 15 | no_weights: bool = False 16 | 17 | # decides whether to use the offline or online llm engine 18 | serving_mode: str = "offline" 19 | 20 | # passed to EngineArgs 21 | engine_args: Dict[str, Any] = field(default_factory=dict) 22 | 23 | def __post_init__(self): 24 | # duplicates that are handled by the backend config directly 25 | if "model" in self.engine_args: 26 | raise ValueError("model should not be passed in `backend.engine_args`, use `backend.model` instead") 27 | 28 | if "tokenizer" in self.engine_args: 29 | raise ValueError("tokenizer should not be passed in `backend.engine_args`, use `backend.processor` instead") 30 | 31 | if "device" in self.engine_args: 32 | raise ValueError("device should not be passed in `backend.engine_args`, use `backend.device` instead") 33 | 34 | if self.serving_mode not in ["offline", "online"]: 35 | raise ValueError(f"Invalid serving_mode: {self.serving_mode}. Must be 'online' or 'offline'.") 36 | 37 | # needed for task/library/model_type inference 38 | self.model_kwargs = { 39 | "revision": self.engine_args.get("revision", "main"), 40 | "trust_remote_code": self.engine_args.get("trust_remote_code", False), 41 | **self.model_kwargs, 42 | } 43 | self.processor_kwargs = { 44 | "revision": self.engine_args.get("tokenizer_revision", "main"), 45 | "trust_remote_code": self.engine_args.get("trust_remote_code", False), 46 | **self.processor_kwargs, 47 | } 48 | 49 | super().__post_init__() 50 | 51 | if self.engine_args.get("disable_log_stats", None) is None: 52 | self.engine_args["disable_log_stats"] = True 53 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_rocm_pytorch.yaml: -------------------------------------------------------------------------------- 1 | name: CLI ROCm PyTorch Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | jobs: 23 | run_cli_rocm_pytorch_single_gpu_tests: 24 | if: ${{ 25 | (github.event_name == 'push') || 26 | (github.event_name == 'workflow_dispatch') || 27 | contains( github.event.pull_request.labels.*.name, 'cli') || 28 | contains( github.event.pull_request.labels.*.name, 'rocm') || 29 | contains( github.event.pull_request.labels.*.name, 'pytorch') || 30 | contains( github.event.pull_request.labels.*.name, 'single_gpu') || 31 | contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_single_gpu') 32 | }} 33 | 34 | uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing 35 | with: 36 | test_file: test_cli.py 37 | machine_type: single-gpu 38 | install_extras: testing,diffusers,timm,peft,gptqmodel 39 | pytest_keywords: cli and cuda and pytorch and not (dp or tp or ddp or device_map or deepspeed) and not (bnb or awq or gptq) 40 | 41 | run_cli_rocm_pytorch_multi_gpu_tests: 42 | if: ${{ 43 | (github.event_name == 'push') || 44 | (github.event_name == 'workflow_dispatch') || 45 | contains( github.event.pull_request.labels.*.name, 'cli') || 46 | contains( github.event.pull_request.labels.*.name, 'rocm') || 47 | contains( github.event.pull_request.labels.*.name, 'pytorch') || 48 | contains( github.event.pull_request.labels.*.name, 'multi_gpu') || 49 | contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_multi_gpu') 50 | }} 51 | 52 | uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing 53 | with: 54 | test_file: test_cli.py 55 | machine_type: multi-gpu 56 | install_extras: testing,diffusers,timm,peft 57 | pytest_keywords: cli and cuda and pytorch and (dp or tp or ddp or device_map) 58 | -------------------------------------------------------------------------------- /docker/cuda/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ARG CUDA_VERSION=12.8.1 16 | ARG UBUNTU_VERSION=22.04 17 | 18 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} 19 | 20 | # Install necessary packages 21 | ENV DEBIAN_FRONTEND=noninteractive 22 | RUN apt-get update && apt-get install -y --no-install-recommends \ 23 | sudo build-essential git bash-completion \ 24 | python3.10 python3-pip python3.10-dev && \ 25 | apt-get clean && rm -rf /var/lib/apt/lists/* && \ 26 | update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ 27 | pip install --no-cache-dir --upgrade pip setuptools wheel requests 28 | 29 | # Install PyTorch 30 | ARG TORCH_VERSION="" 31 | ARG TORCH_CUDA=cu128 32 | ARG TORCH_RELEASE_TYPE=stable 33 | 34 | RUN if [ -n "${TORCH_VERSION}" ]; then \ 35 | pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \ 36 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \ 37 | pip install --no-cache-dir torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \ 38 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \ 39 | pip install --no-cache-dir --pre torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA} ; \ 40 | else \ 41 | echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \ 42 | fi 43 | 44 | # Install quantization libraries from source 45 | ENV MAX_JOBS=1 46 | ENV CUDA_VERSION=12.8 47 | ENV GPTQMODEL_FORCE_BUILD=1 48 | ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0" 49 | 50 | RUN pip install -v gptqmodel --no-build-isolation --no-cache-dir 51 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/openvino/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Dict, Optional 3 | 4 | from ...import_utils import openvino_version 5 | from ..config import BackendConfig 6 | 7 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"] 8 | 9 | 10 | @dataclass 11 | class OpenVINOConfig(BackendConfig): 12 | name: str = "openvino" 13 | version: Optional[str] = openvino_version() 14 | _target_: str = "optimum_benchmark.backends.openvino.backend.OpenVINOBackend" 15 | 16 | no_weights: bool = False 17 | 18 | # ovmodel kwargs 19 | export: Optional[bool] = None 20 | use_cache: Optional[bool] = None 21 | use_merged: Optional[bool] = None 22 | torch_dtype: Optional[str] = None 23 | load_in_8bit: Optional[bool] = None 24 | load_in_4bit: Optional[bool] = None 25 | ov_config: Optional[Dict[str, Any]] = None 26 | quantization_config: Optional[Dict[str, Any]] = None 27 | 28 | # compilation options 29 | half: bool = False 30 | compile: bool = False 31 | reshape: bool = False 32 | reshape_kwargs: Dict[str, int] = field(default_factory=dict) 33 | 34 | def __post_init__(self): 35 | super().__post_init__() 36 | 37 | if self.device_ids is not None: 38 | raise NotImplementedError( 39 | "OpenVINOBackend does not support device_ids. " 40 | "Please use the `device` argument with OpenVINO device notation, e.g. 'CPU', 'GPU.0'" 41 | ) 42 | 43 | if self.model_kwargs.get("torch_dtype", None) is not None: 44 | raise ValueError( 45 | "`torch_dtype` is an explicit argument in the OpenVINO backend config. " 46 | "Please remove it from the `model_kwargs` and set it in the backend config directly." 47 | ) 48 | 49 | if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES: 50 | raise ValueError(f"torch_dtype should be one of None or {TORCH_DTYPES}, got {self.torch_dtype}") 51 | 52 | if self.intra_op_num_threads is not None: 53 | raise NotImplementedError("OpenVINOBackend does not support intra_op_num_threads. Please use the ov_config") 54 | 55 | if self.inter_op_num_threads is not None: 56 | raise NotImplementedError("OpenVINOBackend does not support inter_op_num_threads. Please use the ov_config") 57 | -------------------------------------------------------------------------------- /optimum_benchmark/logging_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.config 3 | from subprocess import PIPE, STDOUT, Popen 4 | from typing import List, Optional 5 | 6 | 7 | def setup_logging( 8 | level: str = "INFO", 9 | to_file: bool = False, 10 | use_colorlog: bool = True, 11 | prefix: Optional[str] = None, 12 | disable_existing_loggers: bool = False, 13 | ): 14 | # base logging config 15 | logging_config = { 16 | "version": 1, 17 | "handlers": { 18 | "console": {"formatter": "simple", "stream": "ext://sys.stdout", "class": "logging.StreamHandler"}, 19 | }, 20 | "root": {"level": level, "handlers": ["console"]}, 21 | "disable_existing_loggers": disable_existing_loggers, 22 | } 23 | 24 | # formatters 25 | logging_config["formatters"] = { 26 | "simple": {"format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"}, 27 | } 28 | 29 | # add file handler 30 | if to_file: 31 | logging_config["handlers"]["file"] = { 32 | "formatter": "simple", 33 | "filename": "benchmark.log", 34 | "class": "logging.FileHandler", 35 | } 36 | logging_config["root"]["handlers"].append("file") 37 | 38 | # use colorlog 39 | if use_colorlog: 40 | logging_config["formatters"]["colorlog"] = { 41 | "()": "colorlog.ColoredFormatter", 42 | "format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s", 43 | "log_colors": {"DEBUG": "purple", "INFO": "green", "WARNING": "yellow", "CRITICAL": "red", "ERROR": "red"}, 44 | } 45 | for handler in logging_config["handlers"]: 46 | logging_config["handlers"][handler]["formatter"] = "colorlog" 47 | 48 | # format prefix 49 | if prefix is not None: 50 | for formatter in logging_config["formatters"]: 51 | logging_config["formatters"][formatter]["format"] = ( 52 | f"[{prefix}]" + logging_config["formatters"][formatter]["format"] 53 | ) 54 | 55 | logging.config.dictConfig(logging_config) 56 | 57 | 58 | def run_subprocess_and_log_stream_output(logger: logging.Logger, args: List[str]) -> Popen: 59 | popen = Popen(args, stdout=PIPE, stderr=STDOUT) 60 | 61 | for line in iter(popen.stdout.readline, b""): 62 | if line is not None: 63 | logger.info(line.decode("utf-8").rstrip()) 64 | 65 | popen.wait() 66 | return popen 67 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cuda_vllm.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CUDA vLLM Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | VLLM_USE_V1: 0 24 | UV_TORCH_BACKEND: auto 25 | 26 | jobs: 27 | run_cli_cuda_vllm_single_gpu_tests: 28 | if: ${{ 29 | (github.event_name == 'push') || 30 | (github.event_name == 'workflow_dispatch') || 31 | contains( github.event.pull_request.labels.*.name, 'cli') || 32 | contains( github.event.pull_request.labels.*.name, 'cuda') || 33 | contains( github.event.pull_request.labels.*.name, 'vllm') || 34 | contains( github.event.pull_request.labels.*.name, 'single_gpu') || 35 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_single_gpu') 36 | }} 37 | 38 | runs-on: 39 | group: aws-g5-4xlarge-plus 40 | 41 | steps: 42 | - name: Checkout 43 | uses: actions/checkout@v4 44 | 45 | - name: Install uv 46 | uses: astral-sh/setup-uv@v6 47 | with: 48 | enable-cache: true 49 | 50 | - name: Run tests 51 | run: | 52 | make test-cli-cuda-vllm-single 53 | 54 | - name: Run examples 55 | run: | 56 | make test-cli-cuda-vllm-single-examples 57 | 58 | run_cli_cuda_vllm_multi_gpu_tests: 59 | if: ${{ 60 | (github.event_name == 'push') || 61 | (github.event_name == 'workflow_dispatch') || 62 | contains( github.event.pull_request.labels.*.name, 'cli') || 63 | contains( github.event.pull_request.labels.*.name, 'cuda') || 64 | contains( github.event.pull_request.labels.*.name, 'vllm') || 65 | contains( github.event.pull_request.labels.*.name, 'multi_gpu') || 66 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_multi_gpu') 67 | }} 68 | 69 | runs-on: 70 | group: aws-g5-12xlarge-plus 71 | 72 | steps: 73 | - name: Checkout 74 | uses: actions/checkout@v4 75 | 76 | - name: Install uv 77 | uses: astral-sh/setup-uv@v6 78 | with: 79 | enable-cache: true 80 | 81 | - name: Run tests 82 | run: | 83 | make test-cli-cuda-vllm-multi 84 | 85 | - name: Run examples 86 | run: | 87 | make test-cli-cuda-vllm-multi-examples 88 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/llama_cpp/backend.py: -------------------------------------------------------------------------------- 1 | from tempfile import TemporaryDirectory 2 | from typing import Any, Dict 3 | 4 | from llama_cpp import Llama 5 | 6 | from ..base import Backend 7 | from .config import LlamaCppConfig 8 | 9 | 10 | class LlamaCppBackend(Backend[LlamaCppConfig]): 11 | NAME: str = "llama_cpp" 12 | 13 | pretrained_model: Llama 14 | 15 | def __init__(self, config: LlamaCppConfig) -> None: 16 | super().__init__(config) 17 | 18 | def load(self) -> None: 19 | self.logger.info("\t+ Creating backend temporary directory") 20 | self.tmpdir = TemporaryDirectory() 21 | self.logger.info("\t+ Loading pretrained model") 22 | self.load_model_from_pretrained() 23 | self.tmpdir.cleanup() 24 | 25 | def load_model_from_pretrained(self) -> None: 26 | """ 27 | Load the pretrained model from the given model name (normally GGUF, GGML) 28 | """ 29 | 30 | self.pretrained_model = Llama.from_pretrained(self.config.model, **self.llama_cpp_kwargs) 31 | 32 | @property 33 | def llama_cpp_kwargs(self) -> Dict[str, Any]: 34 | return { 35 | "embedding": self.config.task == "feature-extraction", 36 | "filename": self.config.filename, 37 | "verbose": False, 38 | "echo": False, 39 | } 40 | 41 | def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: 42 | if self.config.task == "text-generation": 43 | if inputs["input_ids"].shape[0] != 1: 44 | raise ValueError("Batch size must be 1 for Text Generation with llama-cpp-python") 45 | return {"tokens": inputs["input_ids"].squeeze(0).tolist()} 46 | elif self.config.task == "feature-extraction": 47 | return {"input": [self.pretrained_model.detokenize(x).decode("utf-8") for x in inputs["input_ids"]]} 48 | else: 49 | raise ValueError(f"Task {self.config.task} not supported by {self.NAME}") 50 | 51 | def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> Any: 52 | self.pretrained_model.embed(**inputs) 53 | 54 | def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]: 55 | generator = self.pretrained_model.generate(**inputs, reset=True) 56 | for _ in range(kwargs["max_new_tokens"]): 57 | next(generator) 58 | 59 | def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]: 60 | generator = self.pretrained_model.generate(**inputs, reset=True) 61 | for _ in range(kwargs["max_new_tokens"]): 62 | next(generator) 63 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/torchrun/config.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from dataclasses import dataclass, field 3 | from typing import Any, Dict, Optional 4 | 5 | from ..config import LauncherConfig 6 | 7 | 8 | @dataclass 9 | class TorchrunConfig(LauncherConfig): 10 | name: str = "torchrun" 11 | _target_: str = "optimum_benchmark.launchers.torchrun.launcher.TorchrunLauncher" 12 | 13 | # Minimum amount of nodes that the user function will be launched on. 14 | # Elastic agent ensures that the user function start only when the min_nodes amount enters the rendezvous. 15 | min_nodes: int = 1 16 | # Maximum amount of nodes that the user function will be launched on. 17 | max_nodes: int = 1 18 | # On each node the elastic agent will launch this amount of workers that will execute user defined function. 19 | nproc_per_node: int = 2 20 | # User defined role of the worker (defaults to "trainer"). 21 | role: str = "benchmarker" 22 | # The interval in seconds that is used by the elastic_agent as a period of monitoring workers. 23 | monitor_interval: int = 30 24 | # The name of the rdzv store. 25 | rdzv_id: str = str(uuid.uuid4()) 26 | # rdzv_backend to use in the rendezvous (etcd). 27 | rdzv_backend: str = "c10d" 28 | # The endpoint of the rdzv sync. storage. 29 | rdzv_endpoint: str = "localhost:0" 30 | # Key, value pair that specifies rendezvous specific configuration. 31 | rdzv_configs: Dict[str, Any] = field(default_factory=lambda: {"rank": 0, "timeout": -1}) 32 | # The timeout in seconds that is used by the elastic agent to wait for the workers to enter the rendezvous. 33 | rdzv_timeout: int = -1 34 | # The maximum amount of restarts that elastic agent will conduct on workers before failure. 35 | max_restarts: int = 0 36 | # The method is used by the elastic agent to start the workers (spawn, fork, forkserver). 37 | start_method: str = "spawn" 38 | # address of the local node if any. If not set, a lookup on the local machine's FQDN will be performed. 39 | local_addr: Optional[str] = None 40 | 41 | # The socket ifname 42 | socket_ifname: Optional[str] = None 43 | 44 | def __post_init__(self): 45 | super().__post_init__() 46 | 47 | if self.start_method not in ["spawn", "fork"]: 48 | raise ValueError(f"start_method must be one of ['spawn', 'fork'], got {self.start_method}") 49 | 50 | if self.min_nodes != self.max_nodes: 51 | raise ValueError( 52 | f"min_nodes and max_nodes must be equal for a reproducible benchmark, got {self.min_nodes} and {self.max_nodes}" 53 | ) 54 | -------------------------------------------------------------------------------- /docker/rocm/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ARG ROCM_VERSION=6.3.4 16 | ARG UBUNTU_VERSION=22.04 17 | 18 | FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION} 19 | 20 | # Install necessary packages 21 | ENV PATH="/opt/rocm/bin:${PATH}" 22 | ENV DEBIAN_FRONTEND=noninteractive 23 | RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 24 | sudo build-essential git bash-completion \ 25 | python3.10 python3-pip python3.10-dev && \ 26 | apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove -y && \ 27 | update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \ 28 | pip install --no-cache-dir --upgrade pip setuptools wheel requests && \ 29 | cd /opt/rocm/share/amd_smi && pip install . 30 | 31 | RUN apt-get update && apt-get upgrade -y && apt-get install -y hipsparse hipblas hipsolver rocthrust && \ 32 | apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove -y 33 | 34 | # Install PyTorch 35 | ARG TORCH_VERSION="" 36 | ARG TORCH_ROCM=rocm6.3 37 | ARG TORCH_RELEASE_TYPE=stable 38 | 39 | RUN if [ -n "${TORCH_VERSION}" ]; then \ 40 | pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \ 41 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \ 42 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \ 43 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \ 44 | pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/${TORCH_ROCM} ; \ 45 | else \ 46 | echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \ 47 | fi 48 | 49 | # Install quantization libraries from source 50 | ENV MAX_JOBS=1 51 | ENV ROCM_VERSION=6.3 52 | ENV GPTQMODEL_FORCE_BUILD=1 53 | ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" 54 | 55 | RUN pip install -v gptqmodel --no-build-isolation --no-cache-dir 56 | -------------------------------------------------------------------------------- /examples/cuda_pytorch_llama_quants.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig 4 | from optimum_benchmark.logging_utils import setup_logging 5 | 6 | BENCHMARK_NAME = "cuda_pytorch_llama" 7 | MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 8 | PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None) 9 | 10 | WEIGHTS_CONFIGS = { 11 | "float16": { 12 | "torch_dtype": "float16", 13 | "quantization_config": {}, 14 | }, 15 | "4bit-gptq-exllama-v2": { 16 | "torch_dtype": "float16", 17 | "quantization_config": { 18 | "quant_method": "gptq", 19 | "bits": 4, 20 | "use_exllama ": True, 21 | "version": 2, 22 | "model_seqlen": 256, 23 | }, 24 | }, 25 | "torchao-int4wo-128": { 26 | "torch_dtype": "bfloat16", 27 | "quantization_config": { 28 | "quant_method": "torchao", 29 | "quant_type": "int4_weight_only", 30 | "group_size": 128, 31 | }, 32 | }, 33 | } 34 | 35 | 36 | def run_benchmark(weight_config: str): 37 | launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") 38 | backend_config = PyTorchConfig( 39 | model=MODEL, 40 | device="cuda", 41 | device_ids="0", 42 | no_weights=True, 43 | **WEIGHTS_CONFIGS[weight_config], 44 | ) 45 | scenario_config = InferenceConfig( 46 | memory=True, 47 | latency=True, 48 | duration=10, 49 | iterations=10, 50 | warmup_runs=10, 51 | input_shapes={"batch_size": 1, "sequence_length": 64}, 52 | generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32}, 53 | ) 54 | benchmark_config = BenchmarkConfig( 55 | name=BENCHMARK_NAME, 56 | launcher=launcher_config, 57 | scenario=scenario_config, 58 | backend=backend_config, 59 | print_report=True, 60 | log_report=True, 61 | ) 62 | benchmark_report = Benchmark.launch(benchmark_config) 63 | 64 | return benchmark_config, benchmark_report 65 | 66 | 67 | if __name__ == "__main__": 68 | level = os.environ.get("LOG_LEVEL", "INFO") 69 | to_file = os.environ.get("LOG_TO_FILE", "0") == "1" 70 | setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS") 71 | 72 | for weight_config in WEIGHTS_CONFIGS: 73 | benchmark_config, benchmark_report = run_benchmark(weight_config) 74 | benchmark = Benchmark(config=benchmark_config, report=benchmark_report) 75 | 76 | if PUSH_REPO_ID is not None: 77 | benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME, filename=f"{weight_config}.json") 78 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/py_txi/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass 3 | from typing import Any, Dict, List, Optional, Union 4 | 5 | from ...import_utils import py_txi_version 6 | from ...system_utils import is_nvidia_system, is_rocm_system 7 | from ...task_utils import TEXT_EMBEDDING_TASKS, TEXT_GENERATION_TASKS 8 | from ..config import BackendConfig 9 | 10 | 11 | @dataclass 12 | class PyTXIConfig(BackendConfig): 13 | name: str = "py-txi" 14 | version: Optional[str] = py_txi_version() 15 | _target_: str = "optimum_benchmark.backends.py_txi.backend.PyTXIBackend" 16 | 17 | # optimum-benchmark specific 18 | no_weights: bool = False 19 | 20 | # Image to use for the container 21 | image: Optional[str] = None 22 | # Shared memory size for the container 23 | shm_size: Optional[str] = None 24 | # List of custom devices to forward to the container e.g. ["/dev/kfd", "/dev/dri"] for ROCm 25 | devices: Optional[List[str]] = None 26 | # NVIDIA-docker GPU device options e.g. "all" (all) or "0,1,2,3" (ids) or 4 (count) 27 | gpus: Optional[Union[str, int]] = None 28 | # Things to forward to the container 29 | ports: Optional[Dict[str, Any]] = None 30 | environment: Optional[List[str]] = None 31 | volumes: Optional[Dict[str, Any]] = None 32 | # First connection/request 33 | connection_timeout: Optional[int] = None 34 | first_request_timeout: Optional[int] = None 35 | max_concurrent_requests: Optional[int] = None 36 | 37 | # Common options 38 | dtype: Optional[str] = None 39 | # TEI specific 40 | pooling: Optional[str] = None 41 | # TGI specific 42 | sharded: Optional[str] = None 43 | quantize: Optional[str] = None 44 | num_shard: Optional[int] = None 45 | speculate: Optional[int] = None 46 | cuda_graphs: Optional[int] = None 47 | trust_remote_code: Optional[bool] = None 48 | disable_custom_kernels: Optional[bool] = None 49 | 50 | def __post_init__(self): 51 | super().__post_init__() 52 | 53 | if self.task not in TEXT_GENERATION_TASKS + TEXT_EMBEDDING_TASKS: 54 | raise NotImplementedError(f"TXI does not support task {self.task}") 55 | 56 | # Device options 57 | if self.device_ids is not None and is_nvidia_system() and self.gpus is None: 58 | self.gpus = self.device_ids 59 | 60 | if self.device_ids is not None and is_rocm_system() and self.devices is None: 61 | ids = list(map(int, self.device_ids.split(","))) 62 | renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")] 63 | self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in ids] 64 | 65 | self.trust_remote_code = self.model_kwargs.get("trust_remote_code", None) 66 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cuda_pytorch.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CUDA PyTorch Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_TORCH_BACKEND: auto 24 | 25 | jobs: 26 | run_cli_cuda_pytorch_single_gpu_tests: 27 | if: ${{ 28 | (github.event_name == 'push') || 29 | (github.event_name == 'workflow_dispatch') || 30 | contains( github.event.pull_request.labels.*.name, 'cli') || 31 | contains( github.event.pull_request.labels.*.name, 'cuda') || 32 | contains( github.event.pull_request.labels.*.name, 'pytorch') || 33 | contains( github.event.pull_request.labels.*.name, 'single_gpu') || 34 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_single_gpu') 35 | }} 36 | 37 | runs-on: 38 | group: aws-g5-4xlarge-plus 39 | 40 | steps: 41 | - name: Checkout 42 | uses: actions/checkout@v4 43 | 44 | - name: Install uv 45 | uses: astral-sh/setup-uv@v6 46 | with: 47 | enable-cache: true 48 | 49 | - name: Run tests 50 | run: | 51 | make test-cli-cuda-pytorch-single 52 | 53 | - name: Run examples 54 | run: | 55 | make test-cli-cuda-pytorch-single-examples 56 | 57 | run_cli_cuda_pytorch_multi_gpu_tests: 58 | if: ${{ 59 | (github.event_name == 'push') || 60 | (github.event_name == 'workflow_dispatch') || 61 | contains( github.event.pull_request.labels.*.name, 'cli') || 62 | contains( github.event.pull_request.labels.*.name, 'cuda') || 63 | contains( github.event.pull_request.labels.*.name, 'pytorch') || 64 | contains( github.event.pull_request.labels.*.name, 'multi_gpu') || 65 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_multi_gpu') 66 | }} 67 | 68 | runs-on: 69 | group: aws-g5-12xlarge-plus 70 | 71 | # need the devel image for deepspeed compilation 72 | container: 73 | image: nvidia/cuda:12.8.1-devel-ubuntu22.04 74 | options: --ipc host --gpus all 75 | 76 | steps: 77 | - name: Checkout 78 | uses: actions/checkout@v4 79 | 80 | - name: Install uv 81 | uses: astral-sh/setup-uv@v6 82 | with: 83 | enable-cache: true 84 | 85 | - name: Run tests 86 | run: | 87 | make test-cli-cuda-pytorch-multi 88 | 89 | - name: Run examples 90 | run: | 91 | make test-cli-cuda-pytorch-multi-examples 92 | -------------------------------------------------------------------------------- /.github/workflows/test_cli_cuda_tensorrt_llm.yaml: -------------------------------------------------------------------------------- 1 | name: CLI CUDA TensorRT-LLM Tests 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | types: 12 | - opened 13 | - reopened 14 | - synchronize 15 | - labeled 16 | - unlabeled 17 | 18 | concurrency: 19 | cancel-in-progress: true 20 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 21 | 22 | env: 23 | UV_SYSTEM_PYTHON: 1 24 | UV_TORCH_BACKEND: auto 25 | 26 | jobs: 27 | cli_cuda_tensorrt_llm_single_gpu_tests: 28 | if: ${{ 29 | (github.event_name == 'push') || 30 | (github.event_name == 'workflow_dispatch') || 31 | contains( github.event.pull_request.labels.*.name, 'cli') || 32 | contains( github.event.pull_request.labels.*.name, 'cuda') || 33 | contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') || 34 | contains( github.event.pull_request.labels.*.name, 'single_gpu') || 35 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm') 36 | }} 37 | 38 | runs-on: 39 | group: aws-g5-4xlarge-plus 40 | 41 | container: 42 | image: huggingface/optimum-nvidia:latest 43 | options: --ipc host --gpus all 44 | 45 | steps: 46 | - name: Checkout 47 | uses: actions/checkout@v4 48 | 49 | - name: Install uv 50 | uses: astral-sh/setup-uv@v6 51 | 52 | - name: Run tests 53 | run: | 54 | make test-cli-cuda-tensorrt-llm-single 55 | 56 | - name: Run examples 57 | run: | 58 | make test-cli-cuda-tensorrt-llm-single-examples 59 | 60 | cli_cuda_tensorrt_llm_multi_gpu_tests: 61 | if: ${{ 62 | (github.event_name == 'push') || 63 | (github.event_name == 'workflow_dispatch') || 64 | contains( github.event.pull_request.labels.*.name, 'cli') || 65 | contains( github.event.pull_request.labels.*.name, 'cuda') || 66 | contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') || 67 | contains( github.event.pull_request.labels.*.name, 'multi_gpu') || 68 | contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu') 69 | }} 70 | 71 | runs-on: 72 | group: aws-g5-12xlarge-plus 73 | 74 | container: 75 | image: huggingface/optimum-nvidia:latest 76 | options: --ipc host --gpus all 77 | 78 | steps: 79 | - name: Checkout 80 | uses: actions/checkout@v4 81 | 82 | - name: Install uv 83 | uses: astral-sh/setup-uv@v6 84 | 85 | - name: Run tests 86 | run: | 87 | make test-cli-cuda-tensorrt-llm-multi 88 | 89 | - name: Run examples 90 | run: | 91 | make test-cli-cuda-tensorrt-llm-multi-examples 92 | -------------------------------------------------------------------------------- /.github/workflows/images.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Publish Docker Images 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | paths: 9 | - docker/** 10 | - .github/workflows/images.yaml 11 | pull_request: 12 | branches: 13 | - main 14 | types: 15 | - opened 16 | - reopened 17 | - synchronize 18 | - labeled 19 | - unlabeled 20 | schedule: 21 | - cron: "0 0 * * *" 22 | 23 | concurrency: 24 | cancel-in-progress: true 25 | group: ${{ github.workflow }}-${{ github.ref }} 26 | 27 | env: 28 | REGISTRY: ghcr.io 29 | IMAGE_NAME: ${{ github.repository }} 30 | BUILDX_CACHE_TTL: 604800 # Cache expiration in seconds (e.g., 7 days) 31 | 32 | jobs: 33 | publish: 34 | if: ${{ 35 | github.event_name == 'push' || 36 | github.event_name == 'schedule' || 37 | github.event_name == 'workflow_dispatch' || 38 | contains(github.event.pull_request.labels.*.name, 'docker') 39 | }} 40 | 41 | strategy: 42 | fail-fast: true 43 | matrix: 44 | image_flavor: [cpu, cuda, rocm] 45 | 46 | runs-on: ubuntu-latest 47 | 48 | permissions: 49 | contents: write 50 | packages: write 51 | id-token: write 52 | 53 | steps: 54 | - name: Free Disk Space 55 | uses: jlumbroso/free-disk-space@main 56 | with: 57 | tool-cache: true 58 | 59 | - name: Checkout code 60 | uses: actions/checkout@v4 61 | 62 | - name: Set up Docker Buildx 63 | uses: docker/setup-buildx-action@v3 64 | with: 65 | buildkitd-flags: --debug 66 | 67 | - name: Login to GitHub Container Registry 68 | uses: docker/login-action@v3 69 | with: 70 | registry: ghcr.io 71 | username: ${{ github.actor }} 72 | password: ${{ secrets.GITHUB_TOKEN }} 73 | 74 | - name: Extract metadata (tags, labels) for Docker images 75 | id: meta 76 | uses: docker/metadata-action@v5 77 | with: 78 | flavor: | 79 | latest=false 80 | images: | 81 | ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 82 | tags: | 83 | type=raw,value=latest-${{ matrix.image_flavor }} 84 | type=raw,value=sha-${{ github.sha }}-${{ matrix.image_flavor }} 85 | 86 | - name: Build and push Docker images 87 | uses: docker/build-push-action@v5 88 | id: push 89 | with: 90 | context: . 91 | push: true 92 | tags: ${{ steps.meta.outputs.tags }} 93 | labels: ${{ steps.meta.outputs.labels }} 94 | file: docker/${{ matrix.image_flavor }}/Dockerfile 95 | cache-to: type=gha,mode=min,scope=docker-cache-${{ matrix.image_flavor }} 96 | cache-from: type=gha,mode=max,scope=docker-cache-${{ matrix.image_flavor }} 97 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/training/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from logging import getLogger 3 | from typing import Any, Dict 4 | 5 | from ..config import ScenarioConfig 6 | 7 | LOGGER = getLogger("training") 8 | 9 | TRAINING_ARGUMENT = { 10 | "per_device_train_batch_size": 2, 11 | "gradient_accumulation_steps": 1, 12 | "output_dir": "./trainer_output", 13 | "eval_strategy": "no", 14 | "save_strategy": "no", 15 | "do_train": True, 16 | "use_cpu": False, 17 | "max_steps": -1, 18 | # disable evaluation 19 | "do_eval": False, 20 | "do_predict": False, 21 | # disable custom logging 22 | "report_to": "none", 23 | # disbale transformers memory metrics 24 | "skip_memory_metrics": True, 25 | # from pytorch warning: "this flag results in an extra traversal of the 26 | # autograd graph every iteration which can adversely affect performance." 27 | "ddp_find_unused_parameters": False, 28 | } 29 | 30 | DATASET_SHAPES = {"dataset_size": 500, "sequence_length": 16, "num_choices": 1} 31 | 32 | 33 | @dataclass 34 | class TrainingConfig(ScenarioConfig): 35 | name: str = "training" 36 | _target_: str = "optimum_benchmark.scenarios.training.scenario.TrainingScenario" 37 | 38 | # training options 39 | max_steps: int = 140 40 | warmup_steps: int = 40 41 | 42 | # dataset options 43 | dataset_shapes: Dict[str, Any] = field(default_factory=dict) 44 | # training options 45 | training_arguments: Dict[str, Any] = field(default_factory=dict) 46 | 47 | # tracking options 48 | latency: bool = field(default=True, metadata={"help": "Measure latencies and throughputs"}) 49 | memory: bool = field(default=False, metadata={"help": "Measure max memory usage"}) 50 | energy: bool = field(default=False, metadata={"help": "Measure energy usage"}) 51 | 52 | def __post_init__(self): 53 | super().__post_init__() 54 | 55 | self.dataset_shapes = {**DATASET_SHAPES, **self.dataset_shapes} 56 | self.training_arguments = {**TRAINING_ARGUMENT, **self.training_arguments} 57 | 58 | if self.training_arguments["max_steps"] == -1: 59 | self.training_arguments["max_steps"] = self.max_steps 60 | 61 | if self.max_steps != self.training_arguments["max_steps"]: 62 | LOGGER.warning( 63 | f"`scenario.max_steps` ({self.max_steps}) and `scenario.training_arguments.max_steps` " 64 | f"({self.training_arguments['max_steps']}) are different. " 65 | "Using `scenario.training_arguments.max_steps`." 66 | ) 67 | self.max_steps = self.training_arguments["max_steps"] 68 | 69 | if self.warmup_steps > self.max_steps: 70 | raise ValueError( 71 | f"`scenario.warmup_steps` ({self.warmup_steps}) must be smaller than `scenario.max_steps` ({self.max_steps})" 72 | ) 73 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/timm_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from transformers import PretrainedConfig 4 | 5 | from ..import_utils import is_timm_available 6 | 7 | if is_timm_available(): 8 | from timm import create_model 9 | from timm.models import get_pretrained_cfg, load_model_config_from_hf, parse_model_name 10 | 11 | 12 | def get_timm_model_creator(): 13 | if not is_timm_available(): 14 | raise ImportError("timm is not available. Please, pip install timm.") 15 | 16 | return create_model 17 | 18 | 19 | def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig": 20 | if not is_timm_available(): 21 | raise ImportError("timm is not available. Please, pip install timm.") 22 | 23 | model_source, model_name = parse_model_name(model_name) 24 | if model_source == "hf-hub": 25 | # For model names specified in the form `hf-hub:path/architecture_name@revision`, 26 | # load model weights + pretrained_cfg from Hugging Face hub. 27 | pretrained_cfg, model_name = load_model_config_from_hf(model_name) 28 | return pretrained_cfg 29 | 30 | return get_pretrained_cfg(model_name) 31 | 32 | 33 | def extract_timm_shapes_from_config(config: "PretrainedConfig") -> Dict[str, Any]: 34 | if not is_timm_available(): 35 | raise ImportError("timm is not available. Please, pip install timm.") 36 | 37 | artifacts_dict = {} 38 | 39 | config_dict = {k: v for k, v in config.to_dict().items() if v is not None} 40 | artifacts_dict.update(config_dict) 41 | 42 | shapes = {} 43 | 44 | # image input 45 | if "num_channels" in artifacts_dict: 46 | shapes["num_channels"] = artifacts_dict.get("num_channels", None) 47 | elif "channels" in artifacts_dict: 48 | shapes["num_channels"] = artifacts_dict.get("channels", None) 49 | 50 | if "image_size" in artifacts_dict: 51 | image_size = artifacts_dict["image_size"] 52 | elif "size" in artifacts_dict: 53 | image_size = artifacts_dict["size"] 54 | else: 55 | image_size = None 56 | 57 | if isinstance(image_size, (int, float)): 58 | shapes["height"] = image_size 59 | shapes["width"] = image_size 60 | elif isinstance(image_size, (list, tuple)): 61 | shapes["height"] = image_size[0] 62 | shapes["width"] = image_size[0] 63 | elif isinstance(image_size, dict) and len(image_size) == 2: 64 | shapes["height"] = list(image_size.values())[0] 65 | shapes["width"] = list(image_size.values())[1] 66 | elif isinstance(image_size, dict) and len(image_size) == 1: 67 | shapes["height"] = list(image_size.values())[0] 68 | shapes["width"] = list(image_size.values())[0] 69 | 70 | if "input_size" in artifacts_dict: 71 | input_size = artifacts_dict.get("input_size", None) 72 | shapes["num_channels"] = input_size[0] 73 | shapes["height"] = input_size[1] 74 | shapes["width"] = input_size[2] 75 | 76 | return shapes 77 | -------------------------------------------------------------------------------- /optimum_benchmark/benchmark/base.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from logging import getLogger 3 | from typing import TYPE_CHECKING, Type 4 | 5 | from hydra.utils import get_class 6 | 7 | from ..backends.config import BackendConfig 8 | from ..hub_utils import PushToHubMixin, classproperty 9 | from ..launchers import LauncherConfig 10 | from ..scenarios import ScenarioConfig 11 | from .config import BenchmarkConfig 12 | from .report import BenchmarkReport 13 | 14 | if TYPE_CHECKING: 15 | from ..backends.base import Backend 16 | from ..launchers.base import Launcher 17 | from ..scenarios.base import Scenario 18 | 19 | 20 | LOGGER = getLogger("benchmark") 21 | 22 | 23 | @dataclass 24 | class Benchmark(PushToHubMixin): 25 | config: BenchmarkConfig 26 | report: BenchmarkReport 27 | 28 | def __post_init__(self): 29 | if isinstance(self.config, dict): 30 | self.config = BenchmarkConfig.from_dict(self.config) 31 | elif not isinstance(self.config, BenchmarkConfig): 32 | raise ValueError("config must be either a dict or a BenchmarkConfig instance") 33 | 34 | if isinstance(self.report, dict): 35 | self.report = BenchmarkReport.from_dict(self.report) 36 | elif not isinstance(self.report, BenchmarkReport): 37 | raise ValueError("report must be either a dict or a BenchmarkReport instance") 38 | 39 | @staticmethod 40 | def launch(config: BenchmarkConfig): 41 | """ 42 | Runs an benchmark using specified launcher configuration/logic 43 | """ 44 | 45 | # Allocate requested launcher 46 | launcher_config: LauncherConfig = config.launcher 47 | launcher_factory: Type[Launcher] = get_class(launcher_config._target_) 48 | launcher: Launcher = launcher_factory(launcher_config) 49 | 50 | # Launch the benchmark using the launcher 51 | report = launcher.launch(worker=Benchmark.run, worker_args=[config]) 52 | 53 | if config.log_report: 54 | report.log() 55 | 56 | if config.print_report: 57 | report.print() 58 | 59 | return report 60 | 61 | @staticmethod 62 | def run(config: BenchmarkConfig): 63 | """ 64 | Runs a scenario using specified backend configuration/logic 65 | """ 66 | 67 | # Allocate requested backend 68 | backend_config: BackendConfig = config.backend 69 | backend_factory: Type[Backend] = get_class(backend_config._target_) 70 | backend: Backend = backend_factory(backend_config) 71 | 72 | # Allocate requested scenario 73 | scenario_config: ScenarioConfig = config.scenario 74 | scenario_factory: Type[Scenario] = get_class(scenario_config._target_) 75 | scenario: Scenario = scenario_factory(scenario_config) 76 | 77 | # Run the scenario using the backend 78 | report = scenario.run(backend) 79 | 80 | return report 81 | 82 | @classproperty 83 | def default_filename(cls) -> str: 84 | return "benchmark.json" 85 | -------------------------------------------------------------------------------- /optimum_benchmark/cli.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | from logging import getLogger 4 | 5 | import hydra 6 | from hydra.core.config_store import ConfigStore 7 | from omegaconf import DictConfig, OmegaConf 8 | 9 | from . import ( 10 | Benchmark, 11 | BenchmarkConfig, 12 | EnergyStarConfig, 13 | InferenceConfig, 14 | InlineConfig, 15 | IPEXConfig, 16 | LlamaCppConfig, 17 | ONNXRuntimeConfig, 18 | OpenVINOConfig, 19 | ProcessConfig, 20 | PyTorchConfig, 21 | PyTXIConfig, 22 | TorchrunConfig, 23 | TrainingConfig, 24 | TRTLLMConfig, 25 | VLLMConfig, 26 | ) 27 | from .logging_utils import setup_logging 28 | 29 | LOGGER = getLogger("hydra-cli") 30 | 31 | 32 | # Register configurations 33 | cs = ConfigStore.instance() 34 | # benchmark configuration 35 | cs.store(name="benchmark", node=BenchmarkConfig) 36 | # backends configurations 37 | cs.store(group="backend", name=IPEXConfig.name, node=IPEXConfig) 38 | cs.store(group="backend", name=OpenVINOConfig.name, node=OpenVINOConfig) 39 | cs.store(group="backend", name=PyTorchConfig.name, node=PyTorchConfig) 40 | cs.store(group="backend", name=ONNXRuntimeConfig.name, node=ONNXRuntimeConfig) 41 | cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig) 42 | cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig) 43 | cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig) 44 | cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig) 45 | # scenarios configurations 46 | cs.store(group="scenario", name=TrainingConfig.name, node=TrainingConfig) 47 | cs.store(group="scenario", name=InferenceConfig.name, node=InferenceConfig) 48 | cs.store(group="scenario", name=EnergyStarConfig.name, node=EnergyStarConfig) 49 | # launchers configurations 50 | cs.store(group="launcher", name=InlineConfig.name, node=InlineConfig) 51 | cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig) 52 | cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig) 53 | 54 | 55 | # optimum-benchmark 56 | @hydra.main(version_base=None) 57 | def main(config: DictConfig) -> None: 58 | log_level = os.environ.get("LOG_LEVEL", "INFO") 59 | log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1" 60 | override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1" 61 | setup_logging(level=log_level, to_file=log_to_file, prefix="MAIN-PROCESS") 62 | 63 | if glob.glob("benchmark_report.json") and not override_benchmarks: 64 | LOGGER.warning( 65 | "Benchmark was already conducted in the current directory. " 66 | "If you want to override it, set the environment variable OVERRIDE_BENCHMARKS=1 (in hydra.job.env_set)" 67 | ) 68 | return 69 | 70 | # Instantiates the configuration with the right class and triggers its __post_init__ 71 | benchmark_config: BenchmarkConfig = OmegaConf.to_object(config) 72 | benchmark_config.save_json("benchmark_config.json") 73 | 74 | benchmark_report = Benchmark.launch(benchmark_config) 75 | benchmark_report.save_markdown("benchmark_report.md") 76 | benchmark_report.save_json("benchmark_report.json") 77 | benchmark_report.save_text("benchmark_report.txt") 78 | 79 | benchmark = Benchmark(config=benchmark_config, report=benchmark_report) 80 | benchmark.save_json("benchmark.json") 81 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # How to contribute to Optimum-Benchmark? 3 | 4 | `optimum-benchmark` is an open source project, so all contributions and suggestions are welcome. 5 | 6 | You can contribute in many different ways: giving ideas, answering questions, reporting bugs, proposing enhancements, improving the documentation, fixing bugs,... 7 | 8 | Many thanks in advance to every contributor. 9 | 10 | ## How to work on an open Issue? 11 | 12 | You have the list of open Issues at: 13 | 14 | If you would like to work on any of the open Issues: 15 | 16 | 1. Make sure it is not already assigned to someone else. You have the assignee (if any) on the top of the right column of the Issue page. If it is not assigned, you can assign it to yourself by clicking on the "Assign yourself" button, or by leaving a comment on the Issue page. 17 | 18 | 2. Create a Pull Request. 19 | 20 | ## How to create a Pull Request? 21 | 22 | 1. Fork the [repository](https://github.com/huggingface/optimum-benchmark) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account. 23 | 24 | 2. Clone your fork to your local disk, and add the base repository as a remote: 25 | 26 | ```bash 27 | git clone https://github.com//optimum-benchmark.git 28 | cd optimum-benchmark 29 | git remote add upstream https://github.com/huggingface/optimum-benchmark.git 30 | ``` 31 | 32 | 3. Create a new branch to hold your development changes: 33 | 34 | ```bash 35 | git checkout -b name-of-your-branch 36 | ``` 37 | 38 | **do not** work on the `main` branch. 39 | 40 | 4. Set up a development environment by running the following command in a virtual environment: 41 | 42 | ```bash 43 | pip install -e .[quality,testing] 44 | ``` 45 | 46 | 5. Develop the features or fix the bug you want to work on. 47 | 48 | 6. Depending on the feature you're working on and your development environment, you can run tests locally in an isolated docker container using the [makefile](Makefile). For example, to test the CLI with CPU device and PyTorch backend, you can run the following commands: 49 | 50 | ```bash 51 | make install_cli_cpu_pytorch 52 | make test_cli_cpu_pytorch 53 | ``` 54 | 55 | For a better development experience, we recommend using isolated docker containers to run tests: 56 | 57 | ```bash 58 | make build_cpu_image 59 | make run_cpu_container 60 | make install_cli_cpu_pytorch 61 | make test_cli_cpu_pytorch 62 | ``` 63 | 64 | You can find more information about the available make commands in the [Makefile](Makefile). 65 | 66 | 7. Make sure your code is properly formatted and linted by running: 67 | 68 | ```bash 69 | make style 70 | ``` 71 | 72 | 8. Once you're happy with your changes, add the changed files using `git add` and make a commit with `git commit` to record your changes locally: 73 | 74 | ```bash 75 | git add modified_file.py 76 | git commit 77 | ``` 78 | 79 | It is a good idea to sync your copy of the code with the original repository regularly. This way you can quickly account for changes: 80 | 81 | ```bash 82 | git fetch upstream 83 | git rebase upstream/main 84 | ``` 85 | 86 | Push the changes to your account using: 87 | 88 | ```bash 89 | git push -u origin name-of-your-branch 90 | ``` 91 | 92 | 9. Once you are satisfied, go the webpage of your fork on GitHub. Click on "Pull request" to send your to the project maintainers for review. 93 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/pytorch/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from logging import getLogger 3 | from typing import Any, Dict, Optional 4 | 5 | from ...import_utils import torch_version 6 | from ..config import BackendConfig 7 | 8 | AMP_DTYPES = ["bfloat16", "float16"] 9 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"] 10 | 11 | QUANTIZATION_CONFIGS = {"bnb": {"llm_int8_threshold": 0.0}} 12 | 13 | 14 | LOGGER = getLogger(__name__) 15 | 16 | 17 | @dataclass 18 | class PyTorchConfig(BackendConfig): 19 | name: str = "pytorch" 20 | version: Optional[str] = torch_version() 21 | _target_: str = "optimum_benchmark.backends.pytorch.backend.PyTorchBackend" 22 | 23 | # load options 24 | no_weights: bool = False 25 | tp_plan: Optional[str] = None 26 | device_map: Optional[str] = None 27 | torch_dtype: Optional[str] = None 28 | 29 | # optimization options 30 | eval_mode: bool = True 31 | to_bettertransformer: bool = False 32 | low_cpu_mem_usage: Optional[bool] = None 33 | attn_implementation: Optional[str] = None 34 | cache_implementation: Optional[str] = None 35 | 36 | # tf32 options 37 | allow_tf32: bool = False 38 | 39 | # autocast options 40 | autocast_enabled: bool = False 41 | autocast_dtype: Optional[str] = None 42 | 43 | # torch compile options 44 | torch_compile: bool = False 45 | torch_compile_target: str = "forward" 46 | torch_compile_config: Dict[str, Any] = field(default_factory=dict) 47 | 48 | # quantization options 49 | quantization_scheme: Optional[str] = None 50 | quantization_config: Dict[str, Any] = field(default_factory=dict) 51 | 52 | # distributed inference options 53 | deepspeed_inference: bool = False 54 | deepspeed_inference_config: Dict[str, Any] = field(default_factory=dict) 55 | 56 | # peft options 57 | peft_type: Optional[str] = None 58 | peft_config: Dict[str, Any] = field(default_factory=dict) 59 | 60 | def __post_init__(self): 61 | super().__post_init__() 62 | 63 | if self.model_kwargs.get("torch_dtype", None) is not None: 64 | raise ValueError( 65 | "`torch_dtype` is an explicit argument in the PyTorch backend config. " 66 | "Please remove it from the `model_kwargs` and set it in the backend config directly." 67 | ) 68 | 69 | if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES: 70 | raise ValueError(f"`torch_dtype` should be one of None or {TORCH_DTYPES}, got {self.torch_dtype}") 71 | 72 | if self.autocast_dtype is not None and self.autocast_dtype not in AMP_DTYPES: 73 | raise ValueError(f"`autocast_dtype` must be one of {AMP_DTYPES}. Got {self.autocast_dtype} instead.") 74 | 75 | if self.quantization_scheme is not None: 76 | LOGGER.warning( 77 | "`backend.quantization_scheme` is deprecated and will be removed in a future version. " 78 | "Please use `quantization_config.quant_method` instead." 79 | ) 80 | if self.quantization_config is None: 81 | self.quantization_config = {"quant_method": self.quantization_scheme} 82 | else: 83 | self.quantization_config["quant_method"] = self.quantization_scheme 84 | 85 | if self.quantization_config is not None: 86 | self.quantization_config = dict( 87 | QUANTIZATION_CONFIGS.get(self.quantization_scheme, {}), # default config 88 | **self.quantization_config, # user config (overwrites default) 89 | ) 90 | -------------------------------------------------------------------------------- /tests/test_examples.py: -------------------------------------------------------------------------------- 1 | import os 2 | from logging import getLogger 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output 8 | 9 | LOGGER = getLogger("test-examples") 10 | 11 | os.environ["TRANSFORMERS_IS_CI"] = "1" 12 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 13 | 14 | TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples" 15 | TEST_CONFIG_NAMES = [ 16 | config.split(".")[0] 17 | for config in os.listdir(TEST_CONFIG_DIR) 18 | if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) 19 | ] 20 | TEST_SCRIPT_PATHS = [ 21 | str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py") 22 | ] 23 | 24 | ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None) 25 | CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None) 26 | 27 | 28 | @pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES) 29 | def test_cli_configs(config_name): 30 | if config_name == "cpu_ipex_bert": 31 | model = "hf-internal-testing/tiny-random-BertModel" 32 | elif config_name == "cpu_ipex_llama": 33 | model = "hf-internal-testing/tiny-random-LlamaForCausalLM" 34 | elif config_name == "cpu_llama_cpp_text_generation": 35 | model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" 36 | elif config_name == "cpu_llama_cpp_embedding": 37 | model = "nomic-ai/nomic-embed-text-v1.5-GGUF" 38 | elif config_name == "cpu_onnxruntime_static_quant_vit": 39 | model = "hf-internal-testing/tiny-random-ViTModel" 40 | elif config_name == "cpu_openvino_8bit_bert": 41 | model = "hf-internal-testing/tiny-random-BertModel" 42 | elif config_name == "cpu_openvino_diffusion": 43 | model = "hf-internal-testing/tiny-stable-diffusion-torch" 44 | elif config_name == "cuda_pytorch_bert": 45 | model = "hf-internal-testing/tiny-random-BertModel" 46 | elif config_name.startswith("cuda_pytorch_llama"): 47 | model = "hf-internal-testing/tiny-random-LlamaForCausalLM" 48 | elif config_name == "cuda_pytorch_vlm": 49 | model = "hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration" 50 | elif config_name in ["cuda_tgi_llama", "cuda_trt_llama", "cuda_vllm_llama"]: 51 | model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 52 | else: 53 | raise ValueError(f"Unsupported config name: {config_name}") 54 | 55 | args = [ 56 | "optimum-benchmark", 57 | "--config-dir", 58 | TEST_CONFIG_DIR, 59 | "--config-name", 60 | config_name, 61 | "scenario.warmup_runs=1", 62 | "scenario.input_shapes.batch_size=1", 63 | "++scenario.input_shapes.sequence_length=16", 64 | "++scenario.generate_kwargs.max_new_tokens=16", 65 | "++scenario.generate_kwargs.min_new_tokens=16", 66 | "++scenario.call_kwargs.num_inference_steps=4", 67 | "backend.model=" + model, 68 | "++backend.reshape_kwargs.batch_size=1", 69 | "++backend.reshape_kwargs.sequence_length=16", 70 | ] 71 | 72 | if ROCR_VISIBLE_DEVICES is not None: 73 | args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"'] 74 | elif CUDA_VISIBLE_DEVICES is not None: 75 | args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"'] 76 | 77 | popen = run_subprocess_and_log_stream_output(LOGGER, args) 78 | assert popen.returncode == 0, f"Failed to run {config_name}" 79 | 80 | 81 | @pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS) 82 | def test_api_scripts(script_path): 83 | args = ["python", script_path] 84 | 85 | popen = run_subprocess_and_log_stream_output(LOGGER, args) 86 | assert popen.returncode == 0, f"Failed to run {script_path}" 87 | -------------------------------------------------------------------------------- /optimum_benchmark/launchers/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | import tempfile 5 | from abc import ABC 6 | from contextlib import contextmanager 7 | from logging import getLogger 8 | from multiprocessing import Process, set_executable 9 | from typing import Any, Callable, ClassVar, Generic, List, Optional 10 | 11 | from ..benchmark.report import BenchmarkReport 12 | from ..system_utils import is_nvidia_system, is_rocm_system 13 | from .config import LauncherConfigT 14 | from .device_isolation_utils import assert_device_isolation 15 | 16 | NUMA_EXECUTABLE_CONTENT = """#!/bin/bash 17 | echo "Running with numactl wrapper" 18 | echo "numactl path: {numactl_path}" 19 | echo "numactl args: {numactl_args}" 20 | echo "python path: {python_path}" 21 | echo "python args: $@" 22 | {numactl_path} {numactl_args} {python_path} "$@" 23 | """ 24 | 25 | 26 | class Launcher(Generic[LauncherConfigT], ABC): 27 | NAME: ClassVar[str] 28 | 29 | config: LauncherConfigT 30 | 31 | def __init__(self, config: LauncherConfigT): 32 | self.config = config 33 | self.logger = getLogger(self.NAME) 34 | self.logger.info(f"Allocated {self.NAME} launcher") 35 | 36 | def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport: 37 | raise NotImplementedError("Launcher must implement launch method") 38 | 39 | @contextmanager 40 | def device_isolation(self, pid: int, device_ids: Optional[str] = None): 41 | if device_ids is None: 42 | if is_rocm_system(): 43 | device_ids = os.environ.get("ROCR_VISIBLE_DEVICES", None) 44 | elif is_nvidia_system(): 45 | device_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None) 46 | 47 | self.device_isolation_process = Process( 48 | target=assert_device_isolation, 49 | kwargs={"action": self.config.device_isolation_action, "device_ids": device_ids, "pid": pid}, 50 | daemon=True, 51 | ) 52 | self.device_isolation_process.start() 53 | self.logger.info(f"\t+ Isolating device(s) [{device_ids}] for process [{pid}] and its children") 54 | self.logger.info(f"\t+ Executing action [{self.config.device_isolation_action}] in case of violation") 55 | 56 | yield 57 | 58 | self.logger.info("\t+ Stopping device isolation process") 59 | self.device_isolation_process.terminate() 60 | self.device_isolation_process.join() 61 | self.device_isolation_process.close() 62 | 63 | @contextmanager 64 | def numactl_executable(self): 65 | self.logger.info("\t+ Warming up multiprocessing context") 66 | dummy_process = Process(target=dummy_target, daemon=False) 67 | dummy_process.start() 68 | dummy_process.join() 69 | dummy_process.close() 70 | 71 | self.logger.info("\t+ Creating numactl wrapper executable for multiprocessing") 72 | python_path = sys.executable 73 | numactl_path = shutil.which("numactl") 74 | if numactl_path is None: 75 | raise RuntimeError("ِCould not find numactl executable. Please install numactl and try again.") 76 | numactl_args = " ".join([f"--{key}={value}" for key, value in self.config.numactl_kwargs.items()]) 77 | numa_executable = tempfile.NamedTemporaryFile(delete=False, prefix="numa_executable_", suffix=".sh") 78 | numa_executable_content = NUMA_EXECUTABLE_CONTENT.format( 79 | numactl_path=numactl_path, numactl_args=numactl_args, python_path=python_path 80 | ) 81 | numa_executable.write(numa_executable_content.encode()) 82 | os.chmod(numa_executable.name, 0o777) 83 | numa_executable.close() 84 | 85 | self.logger.info("\t+ Setting multiprocessing executable to numactl wrapper") 86 | set_executable(numa_executable.name) 87 | 88 | yield 89 | 90 | self.logger.info("\t+ Resetting default multiprocessing executable") 91 | os.unlink(numa_executable.name) 92 | set_executable(sys.executable) 93 | 94 | 95 | def dummy_target() -> None: 96 | exit(0) 97 | -------------------------------------------------------------------------------- /tests/test_energy_star.py: -------------------------------------------------------------------------------- 1 | import os 2 | from logging import getLogger 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output 8 | 9 | LOGGER = getLogger("test-cli") 10 | 11 | os.environ["TRANSFORMERS_IS_CI"] = "1" 12 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 13 | 14 | TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star" 15 | TEST_CONFIG_NAMES = [ 16 | config.split(".")[0] 17 | for config in os.listdir(TEST_CONFIG_DIR) 18 | if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) 19 | ] 20 | TEST_SCRIPT_PATHS = [ 21 | str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py") 22 | ] 23 | 24 | ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None) 25 | CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None) 26 | 27 | 28 | @pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES) 29 | def test_cli_configs(config_name): 30 | if config_name == "automatic_speech_recognition": 31 | model = "optimum-internal-testing/tiny-random-whisper" 32 | elif config_name == "image_classification": 33 | model = "hf-internal-testing/tiny-random-ViTModel" 34 | elif config_name == "image_to_text": 35 | model = "hf-internal-testing/tiny-random-BlipModel" 36 | elif config_name == "object_detection": 37 | model = "hf-internal-testing/tiny-random-DetrModel" 38 | elif config_name == "question_answering": 39 | model = "hf-internal-testing/tiny-random-BertModel" 40 | elif config_name == "sentence_similarity": 41 | model = "hf-internal-testing/tiny-random-BertModel" 42 | elif config_name == "text_classification": 43 | model = "hf-internal-testing/tiny-random-BertModel" 44 | elif config_name == "summarization": 45 | model = "hf-internal-testing/tiny-random-BartModel" 46 | elif config_name == "t5_question_answering": 47 | model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" 48 | elif config_name == "t5_summarization": 49 | model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" 50 | elif config_name == "t5_text_classification": 51 | model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" 52 | elif config_name == "t5_text_generation": 53 | model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" 54 | elif config_name == "text_to_image": 55 | model = "hf-internal-testing/tiny-stable-diffusion-torch" 56 | elif config_name == "text_generation": 57 | model = "tiny-random/gpt-oss" 58 | else: 59 | raise ValueError(f"Unknown config name: {config_name}") 60 | 61 | args = [ 62 | "optimum-benchmark", 63 | "--config-dir", 64 | TEST_CONFIG_DIR.as_posix(), 65 | "--config-name", 66 | config_name, 67 | "backend.device=cpu", 68 | "scenario.energy=true", 69 | "scenario.memory=true", 70 | "scenario.latency=true", 71 | "scenario.num_samples=1", 72 | "scenario.warmup_runs=1", 73 | "scenario.input_shapes.batch_size=1", 74 | "++scenario.generate_kwargs.max_new_tokens=16", 75 | "++scenario.generate_kwargs.min_new_tokens=16", 76 | "++scenario.call_kwargs.num_inference_steps=4", 77 | "launcher.device_isolation=false", 78 | "backend.device_map=null", 79 | f"backend.model={model}", 80 | ] 81 | 82 | if ROCR_VISIBLE_DEVICES is not None: 83 | args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"'] 84 | elif CUDA_VISIBLE_DEVICES is not None: 85 | args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"'] 86 | 87 | popen = run_subprocess_and_log_stream_output(LOGGER, args) 88 | assert popen.returncode == 0, f"Failed to run {config_name}" 89 | 90 | 91 | @pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS) 92 | def test_api_scripts(script_path): 93 | args = ["python", script_path] 94 | 95 | popen = run_subprocess_and_log_stream_output(LOGGER, args) 96 | assert popen.returncode == 0, f"Failed to run {script_path}" 97 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/ipex/backend.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | from tempfile import TemporaryDirectory 4 | from typing import Any, Dict 5 | 6 | import torch 7 | from hydra.utils import get_class 8 | 9 | from ...import_utils import is_accelerate_available, is_torch_distributed_available 10 | from ..base import Backend 11 | from ..transformers_utils import fast_weights_init 12 | from .config import IPEXConfig 13 | from .utils import TASKS_TO_IPEXMODELS 14 | 15 | if is_accelerate_available(): 16 | from accelerate import Accelerator 17 | 18 | if is_torch_distributed_available(): 19 | import torch.distributed 20 | 21 | if not hasattr(os, "exit"): 22 | os.exit = os._exit 23 | 24 | 25 | class IPEXBackend(Backend[IPEXConfig]): 26 | NAME: str = "ipex" 27 | 28 | def __init__(self, config: IPEXConfig) -> None: 29 | super().__init__(config) 30 | 31 | if self.config.task in TASKS_TO_IPEXMODELS: 32 | self.ipexmodel_class = get_class(TASKS_TO_IPEXMODELS[self.config.task]) 33 | self.logger.info(f"\t+ Using IPEXModel class {self.ipexmodel_class.__name__}") 34 | else: 35 | raise NotImplementedError(f"IPEXBackend does not support task {self.config.task}") 36 | 37 | def load(self) -> None: 38 | self.logger.info("\t+ Creating backend temporary directory") 39 | self.tmpdir = TemporaryDirectory() 40 | 41 | if self.config.no_weights: 42 | self.logger.info("\t+ Creating no weights IPEXModel") 43 | self.create_no_weights_model_fast() 44 | self.logger.info("\t+ Loading no weights IPEXModel") 45 | self.load_ipexmodel_with_no_weights() 46 | else: 47 | self.logger.info("\t+ Loading pretrained IPEXModel") 48 | self.load_ipexmodel_from_pretrained() 49 | 50 | self.tmpdir.cleanup() 51 | 52 | def load_ipexmodel_from_pretrained(self) -> None: 53 | with torch.device(self.config.device): 54 | self.pretrained_model = self.ipexmodel_class.from_pretrained( 55 | self.config.model, 56 | **self.config.model_kwargs, 57 | **self.ipexmodel_kwargs, 58 | ) 59 | 60 | def load_ipexmodel_with_no_weights(self) -> None: 61 | with fast_weights_init(): 62 | original_model, self.config.model = self.config.model, self.no_weights_model_path.as_posix() 63 | self.load_ipexmodel_from_pretrained() 64 | self.config.model = original_model 65 | 66 | @property 67 | def ipexmodel_kwargs(self) -> Dict[str, Any]: 68 | kwargs = {} 69 | 70 | if self.config.torch_dtype is not None: 71 | kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype) 72 | 73 | return kwargs 74 | 75 | @property 76 | def split_between_processes(self) -> bool: 77 | return is_torch_distributed_available() and torch.distributed.is_initialized() 78 | 79 | def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: 80 | if self.split_between_processes: 81 | with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: 82 | inputs = process_inputs 83 | 84 | for key, value in inputs.items(): 85 | if isinstance(value, torch.Tensor): 86 | inputs[key] = value.to(self.config.device) 87 | 88 | return inputs 89 | 90 | def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: 91 | return self.pretrained_model.forward(**inputs, **kwargs) 92 | 93 | def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: 94 | return self.pretrained_model.generate(**inputs, **kwargs) 95 | 96 | def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: 97 | return self.pretrained_model.generate(**inputs, **kwargs) 98 | 99 | def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: 100 | return self.pretrained_model(**inputs, **kwargs) 101 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | .ruff_cache/ 163 | .vscode/ 164 | *.ipynb 165 | runs/ 166 | sweeps/ 167 | data/ 168 | version.txt 169 | 170 | .engine/ 171 | work-in-progress/ 172 | experiments/ 173 | amdsmi/ 174 | amd-* 175 | 176 | # Code carbon 177 | generate_codecarbon.json 178 | task_codecarbon.json 179 | prefill_codecarbon.json 180 | 181 | # Mac specific 182 | external_repos/ 183 | .DS_Store 184 | outputs/ 185 | -------------------------------------------------------------------------------- /optimum_benchmark/backends/onnxruntime/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Any, Dict, Optional 3 | 4 | from ...import_utils import onnxruntime_version 5 | from ...task_utils import TEXT_GENERATION_TASKS 6 | from ..config import BackendConfig 7 | 8 | QUANTIZATION_CONFIG = { 9 | "is_static": False, 10 | "format": "QOperator", 11 | # is_static and format are mandatory 12 | } 13 | 14 | CALIBRATION_CONFIG = { 15 | "method": "MinMax" 16 | # method is mandatory 17 | } 18 | 19 | AUTO_QUANTIZATION_CONFIG = { 20 | "is_static": False 21 | # is_static is mandatory 22 | } 23 | 24 | IO_BINDING_LIBRARIES = ["transformers", "timm"] 25 | IO_BINDING_PROVIDERS = ["CPUExecutionProvider", "CUDAExecutionProvider"] 26 | DEVICE_PROVIDER_MAP = {"cpu": "CPUExecutionProvider", "cuda": "CUDAExecutionProvider"} 27 | 28 | 29 | @dataclass 30 | class ONNXRuntimeConfig(BackendConfig): 31 | name: str = "onnxruntime" 32 | version: Optional[str] = onnxruntime_version() 33 | _target_: str = "optimum_benchmark.backends.onnxruntime.backend.ONNXRuntimeBackend" 34 | 35 | # load options 36 | no_weights: bool = False 37 | 38 | # ortmodel kwargs 39 | export: Optional[bool] = None 40 | provider: Optional[str] = None 41 | use_cache: Optional[bool] = None 42 | use_merged: Optional[bool] = None 43 | torch_dtype: Optional[str] = None 44 | use_io_binding: Optional[bool] = None 45 | session_options: Dict[str, Any] = field(default_factory=dict) 46 | provider_options: Dict[str, Any] = field(default_factory=dict) 47 | 48 | # null, O1, O2, O3, O4 49 | auto_optimization: Optional[str] = None 50 | auto_optimization_config: Dict[str, Any] = field(default_factory=dict) 51 | 52 | # null, arm64, avx2, avx512, avx512_vnni, tensorrt 53 | auto_quantization: Optional[str] = None 54 | auto_quantization_config: Dict[str, Any] = field(default_factory=dict) 55 | 56 | # minmax, entropy, l2norm, percentiles 57 | auto_calibration: Optional[str] = None 58 | auto_calibration_config: Dict[str, Any] = field(default_factory=dict) 59 | 60 | # manual optimization options 61 | optimization: bool = False 62 | optimization_config: Dict[str, Any] = field(default_factory=dict) 63 | 64 | # manual quantization options 65 | quantization: bool = False 66 | quantization_config: Dict[str, Any] = field(default_factory=dict) 67 | 68 | # manual calibration options 69 | calibration: bool = False 70 | calibration_config: Dict[str, Any] = field(default_factory=dict) 71 | 72 | def __post_init__(self): 73 | super().__post_init__() 74 | 75 | if self.device not in ["cpu", "cuda"]: 76 | raise ValueError(f"ONNXRuntimeBackend only supports CPU and CUDA devices, got {self.device}") 77 | 78 | if not self.no_weights and not self.export and self.torch_dtype is not None: 79 | raise NotImplementedError("Can't convert an exported model's weights to a different dtype.") 80 | 81 | if self.provider is None: 82 | self.provider = DEVICE_PROVIDER_MAP[self.device] 83 | 84 | if self.use_io_binding is None: 85 | self.use_io_binding = self.provider in IO_BINDING_PROVIDERS and self.library in IO_BINDING_LIBRARIES 86 | 87 | if self.provider == "TensorrtExecutionProvider" and self.task in TEXT_GENERATION_TASKS: 88 | raise NotImplementedError("we don't support TensorRT for text generation tasks") 89 | 90 | if self.quantization: 91 | self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config} 92 | # raise ValueError if the quantization is static but calibration is not enabled 93 | if self.quantization_config["is_static"] and self.auto_calibration is None and not self.calibration: 94 | raise ValueError( 95 | "Quantization is static but calibration is not enabled. " 96 | "Please enable calibration or disable static quantization." 97 | ) 98 | 99 | if self.auto_quantization is not None: 100 | self.auto_quantization_config = {**AUTO_QUANTIZATION_CONFIG, **self.auto_quantization_config} 101 | if self.auto_quantization_config["is_static"] and self.auto_calibration is None and not self.calibration: 102 | raise ValueError( 103 | "Quantization is static but calibration is not enabled. " 104 | "Please enable calibration or disable static quantization." 105 | ) 106 | 107 | if self.calibration: 108 | self.calibration_config = {**CALIBRATION_CONFIG, **self.calibration_config} 109 | -------------------------------------------------------------------------------- /optimum_benchmark/scenarios/inference/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from logging import getLogger 3 | from typing import Any, Dict, Optional 4 | 5 | from ...system_utils import is_rocm_system 6 | from ..config import ScenarioConfig 7 | 8 | LOGGER = getLogger("inference") 9 | 10 | INPUT_SHAPES = { 11 | "batch_size": 2, 12 | } 13 | 14 | 15 | @dataclass 16 | class InferenceConfig(ScenarioConfig): 17 | name: str = "inference" 18 | _target_: str = "optimum_benchmark.scenarios.inference.scenario.InferenceScenario" 19 | 20 | # benchmark options 21 | iterations: int = field( 22 | default=10, 23 | metadata={ 24 | "help": "Minimum number of iterations to run the benchmark. " 25 | "The number of tracked inferences will be at least this value." 26 | "Set to 0 to disable this constraint (benchmark will run for `duration` seconds)." 27 | }, 28 | ) 29 | duration: int = field( 30 | default=10, 31 | metadata={ 32 | "help": "Minimum duration of the benchmark in seconds. " 33 | "The sum of tracked inferences will be at least this value." 34 | "Set to 0 to disable this constraint (benchmark will run for `iterations` iterations)." 35 | }, 36 | ) 37 | warmup_runs: int = field( 38 | default=10, 39 | metadata={"help": "Number of warmup runs to perform before benchmarking."}, 40 | ) 41 | 42 | # input/output config 43 | input_shapes: Dict[str, Any] = field( 44 | default_factory=dict, 45 | metadata={"help": "Input shapes for the model. Missing keys will be filled with default values."}, 46 | ) 47 | new_tokens: Optional[int] = field( 48 | default=None, 49 | metadata={"help": "If set, `max_new_tokens` and `min_new_tokens` will be set to this value."}, 50 | ) 51 | 52 | # tracking options 53 | memory: bool = field(default=False, metadata={"help": "Measure max memory usage"}) 54 | latency: bool = field(default=True, metadata={"help": "Measure latencies and throughputs"}) 55 | energy: bool = field(default=False, metadata={"help": "Measure energy usage and efficiency"}) 56 | 57 | # methods kwargs 58 | forward_kwargs: Dict[str, Any] = field( 59 | default_factory=dict, metadata={"help": "Keyword arguments to pass to the forward method of the backend."} 60 | ) 61 | generate_kwargs: Dict[str, Any] = field( 62 | default_factory=dict, metadata={"help": "Keyword arguments to pass to the generate method of the backend."} 63 | ) 64 | call_kwargs: Dict[str, Any] = field( 65 | default_factory=dict, metadata={"help": "Keyword arguments to pass to the call method of the backend."} 66 | ) 67 | 68 | def __post_init__(self): 69 | super().__post_init__() 70 | 71 | self.input_shapes = {**INPUT_SHAPES, **self.input_shapes} 72 | 73 | if self.new_tokens is not None: 74 | LOGGER.warning( 75 | "`new_tokens` is deprecated. Use `max_new_tokens` and `min_new_tokens` instead. " 76 | "Setting `max_new_tokens` and `min_new_tokens` to `new_tokens`." 77 | ) 78 | self.generate_kwargs["max_new_tokens"] = self.new_tokens 79 | self.generate_kwargs["min_new_tokens"] = self.new_tokens 80 | 81 | if ( 82 | "max_new_tokens" in self.generate_kwargs 83 | and "min_new_tokens" in self.generate_kwargs 84 | and self.generate_kwargs["max_new_tokens"] != self.generate_kwargs["min_new_tokens"] 85 | ): 86 | raise ValueError( 87 | "Setting `min_new_tokens` and `max_new_tokens` to different values results in non-deterministic behavior." 88 | ) 89 | 90 | elif "max_new_tokens" in self.generate_kwargs and "min_new_tokens" not in self.generate_kwargs: 91 | LOGGER.warning( 92 | "Setting `max_new_tokens` without `min_new_tokens` results in non-deterministic behavior. " 93 | "Setting `min_new_tokens` to `max_new_tokens`." 94 | ) 95 | self.generate_kwargs["min_new_tokens"] = self.generate_kwargs["max_new_tokens"] 96 | 97 | elif "min_new_tokens" in self.generate_kwargs and "max_new_tokens" not in self.generate_kwargs: 98 | LOGGER.warning( 99 | "Setting `min_new_tokens` without `max_new_tokens` results in non-deterministic behavior. " 100 | "Setting `max_new_tokens` to `min_new_tokens`." 101 | ) 102 | self.generate_kwargs["max_new_tokens"] = self.generate_kwargs["min_new_tokens"] 103 | 104 | if self.energy and is_rocm_system(): 105 | raise ValueError("Energy measurement through codecarbon is not yet available on ROCm-powered devices.") 106 | --------------------------------------------------------------------------------