├── .python-version
├── optimum_benchmark
    ├── backends
    │   ├── ipex
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── config.py
    │   │   └── backend.py
    │   ├── vllm
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── llama_cpp
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   └── backend.py
    │   ├── openvino
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── config.py
    │   ├── py_txi
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── pytorch
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── onnxruntime
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── config.py
    │   ├── tensorrt_llm
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── config.py
    │   ├── peft_utils.py
    │   ├── __init__.py
    │   ├── diffusers_utils.py
    │   └── timm_utils.py
    ├── benchmark
    │   ├── __init__.py
    │   ├── config.py
    │   └── base.py
    ├── generators
    │   ├── __init__.py
    │   ├── input_generator.py
    │   ├── dataset_generator.py
    │   └── base.py
    ├── profilers
    │   ├── __init__.py
    │   ├── fx_profiler.py
    │   └── ort_profiler.py
    ├── launchers
    │   ├── inline
    │   │   ├── __init__.py
    │   │   ├── launcher.py
    │   │   └── config.py
    │   ├── process
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── torchrun
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── __init__.py
    │   ├── config.py
    │   └── base.py
    ├── scenarios
    │   ├── training
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── energy_star
    │   │   └── __init__.py
    │   ├── inference
    │   │   ├── __init__.py
    │   │   └── config.py
    │   ├── __init__.py
    │   ├── config.py
    │   └── base.py
    ├── process_utils.py
    ├── version.py
    ├── trackers
    │   └── __init__.py
    ├── __init__.py
    ├── logging_utils.py
    └── cli.py
├── tests
    ├── configs
    │   ├── _cpu_.yaml
    │   ├── _export_.yaml
    │   ├── _gpt2_.yaml
    │   ├── _bloom_.yaml
    │   ├── _cuda_.yaml
    │   ├── _dp_.yaml
    │   ├── _bert_.yaml
    │   ├── _device_isolation_.yaml
    │   ├── _device_map_.yaml
    │   ├── _st_bert_.yaml
    │   ├── _torch_compile_.yaml
    │   ├── _timm_.yaml
    │   ├── _no_weights_.yaml
    │   ├── _serving_mode_.yaml
    │   ├── _training_.yaml
    │   ├── _vllm_.yaml
    │   ├── _awq_.yaml
    │   ├── _tensorrt_llm_pp_.yaml
    │   ├── _tensorrt_llm_tp_.yaml
    │   ├── _ort_quant_.yaml
    │   ├── _peft_.yaml
    │   ├── _bnb_.yaml
    │   ├── _gptq_.yaml
    │   ├── _text_encoders_decoders_.yaml
    │   ├── _diffusers_.yaml
    │   ├── _text_decoders_.yaml
    │   ├── _gguf_.yaml
    │   ├── _text_encoders_.yaml
    │   ├── _ddp_.yaml
    │   ├── _tp_.yaml
    │   ├── _vllm_pp_.yaml
    │   ├── _vllm_tp_.yaml
    │   ├── _image_text_to_text_.yaml
    │   ├── cpu_inference_pytorch_timm.yaml
    │   ├── _inference_.yaml
    │   ├── cpu_inference_llama_cpp_gguf.yaml
    │   ├── cpu_inference_openvino_diffusers.yaml
    │   ├── cpu_inference_pytorch_diffusers.yaml
    │   ├── cuda_inference_tensorrt_llm.yaml
    │   ├── cuda_inference_tensorrt_llm_pp.yaml
    │   ├── cuda_inference_tensorrt_llm_tp.yaml
    │   ├── _deepspeed_inference_.yaml
    │   ├── cpu_inference_py_txi_gpt2.yaml
    │   ├── cuda_inference_py_txi_gpt2.yaml
    │   ├── cpu_inference_onnxruntime_timm.yaml
    │   ├── cpu_inference_py_txi_st_bert.yaml
    │   ├── cuda_inference_py_txi_st_bert.yaml
    │   ├── cuda_inference_pytorch_timm.yaml
    │   ├── cpu_inference_ipex_text_decoders.yaml
    │   ├── cpu_inference_onnxruntime_diffusers.yaml
    │   ├── cpu_training_pytorch_text_decoders.yaml
    │   ├── cpu_training_pytorch_text_encoders.yaml
    │   ├── cpu_inference_ipex_text_encoders.yaml
    │   ├── cpu_inference_pytorch_text_decoders.yaml
    │   ├── cpu_inference_pytorch_text_encoders.yaml
    │   ├── cpu_inference_pytorch_timm_torch_compile.yaml
    │   ├── cuda_inference_pytorch_diffusers.yaml
    │   ├── cpu_inference_openvino_text_decoders.yaml
    │   ├── cpu_inference_openvino_text_encoders.yaml
    │   ├── cpu_inference_pytorch_diffusers_torch_compile.yaml
    │   ├── cpu_inference_pytorch_image_text_to_text.yaml
    │   ├── cuda_inference_vllm_bloom_pp.yaml
    │   ├── cuda_inference_vllm_bloom_tp.yaml
    │   ├── cuda_training_pytorch_ddp.yaml
    │   ├── cpu_inference_pytorch_text_encoders_decoders.yaml
    │   ├── cuda_inference_pytorch_bnb.yaml
    │   ├── cuda_inference_pytorch_gptq.yaml
    │   ├── cuda_training_pytorch_dp.yaml
    │   ├── cuda_training_pytorch_peft.yaml
    │   ├── cuda_inference_pytorch_tp.yaml
    │   ├── cuda_training_pytorch_device_map.yaml
    │   ├── cpu_inference_onnxruntime_ort_quant.yaml
    │   ├── cpu_inference_onnxruntime_text_decoders.yaml
    │   ├── cpu_inference_onnxruntime_text_encoders.yaml
    │   ├── cuda_inference_pytorch_device_map.yaml
    │   ├── cuda_inference_pytorch_timm_torch_compile.yaml
    │   ├── cuda_inference_vllm_bloom.yaml
    │   ├── cuda_training_pytorch_text_decoders.yaml
    │   ├── cuda_training_pytorch_text_encoders.yaml
    │   ├── cuda_inference_pytorch_text_decoders.yaml
    │   ├── cuda_inference_pytorch_text_encoders.yaml
    │   ├── cuda_inference_pytorch_diffusers_torch_compile.yaml
    │   ├── cpu_inference_onnxruntime_text_encoders_decoders.yaml
    │   ├── cuda_inference_pytorch_deepspeed_inference.yaml
    │   ├── cuda_inference_onnxruntime_text_decoders.yaml
    │   ├── cuda_inference_onnxruntime_text_encoders.yaml
    │   └── _base_.yaml
    ├── conftest.py
    ├── test_examples.py
    └── test_energy_star.py
├── logo.png
├── uv.toml
├── examples
    ├── mps_pytorch_bert.yaml
    ├── cpu_llama_cpp_text_generation.yaml
    ├── cuda_pytorch_bert.yaml
    ├── cpu_llama_cpp_embedding.yaml
    ├── cpu_openvino_diffusion.yaml
    ├── cpu_openvino_8bit_bert.yaml
    ├── cpu_onnxruntime_static_quant_vit.yaml
    ├── _base_.yaml
    ├── cuda_pytorch_llama.yaml
    ├── cuda_tgi_llama.yaml
    ├── cuda_trt_llama.yaml
    ├── cpu_ipex_bert.yaml
    ├── cuda_pytorch_llama_compile_model.yaml
    ├── cuda_pytorch_llama_compile_regions.yaml
    ├── cpu_ipex_llama.yaml
    ├── cuda_vllm_llama.yaml
    ├── cuda_pytorch_vlm.yaml
    ├── cuda_pytorch_bert.py
    └── cuda_pytorch_llama_quants.py
├── docker
    ├── unroot
    │   └── Dockerfile
    ├── cpu
    │   └── Dockerfile
    ├── cuda
    │   └── Dockerfile
    └── rocm
    │   └── Dockerfile
├── energy_star
    ├── _base_.yaml
    ├── object_detection.yaml
    ├── image_to_text.yaml
    ├── image_classification.yaml
    ├── automatic_speech_recognition.yaml
    ├── text_to_image.yaml
    ├── text_classification.yaml
    ├── question_answering.yaml
    ├── summarization.yaml
    ├── sentence_similarity.yaml
    ├── t5_question_answering.yaml
    ├── t5_summarization.yaml
    ├── t5_text_generation.yaml
    ├── t5_text_classification.yaml
    └── text_generation.yaml
├── .github
    └── workflows
    │   ├── quality.yaml
    │   ├── security.yml
    │   ├── test_energy_star.yaml
    │   ├── test_api_rocm.yaml
    │   ├── test_cli_cpu_ipex.yaml
    │   ├── test_cli_cpu_pytorch.yaml
    │   ├── test_cli_cpu_openvino.yaml
    │   ├── test_cli_cpu_llama_cpp.yaml
    │   ├── test_cli_cpu_onnxruntime.yaml
    │   ├── test_cli_cuda_py_txi.yaml
    │   ├── test_cli_cuda_onnxruntime.yaml
    │   ├── test_api_cpu.yaml
    │   ├── test_api_cuda.yaml
    │   ├── test_api_misc.yaml
    │   ├── test_cli_misc.yaml
    │   ├── test_cli_cpu_py_txi.yaml
    │   ├── test_cli_rocm_pytorch.yaml
    │   ├── test_cli_cuda_vllm.yaml
    │   ├── test_cli_cuda_pytorch.yaml
    │   ├── test_cli_cuda_tensorrt_llm.yaml
    │   └── images.yaml
├── scripts
    ├── total_tests_runs.py
    └── update_ci_badges.py
├── CONTRIBUTING.md
└── .gitignore


/.python-version:
--------------------------------------------------------------------------------
1 | 3.10
2 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/ipex/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/vllm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/generators/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/profilers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/llama_cpp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/openvino/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/py_txi/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/inline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/process/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/torchrun/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/onnxruntime/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/tensorrt_llm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/energy_star/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/inference/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/configs/_cpu_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   device: cpu
3 | 


--------------------------------------------------------------------------------
/tests/configs/_export_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   export: true
3 | 


--------------------------------------------------------------------------------
/tests/configs/_gpt2_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: openai-community/gpt2
3 | 


--------------------------------------------------------------------------------
/tests/configs/_bloom_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: bigscience/bloom-560m
3 | 


--------------------------------------------------------------------------------
/tests/configs/_cuda_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   device: cuda
3 |   device_ids: 0
4 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum-benchmark/HEAD/logo.png


--------------------------------------------------------------------------------
/tests/configs/_dp_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   device_ids: 0,1
3 |   model: openai-community/gpt2
4 | 


--------------------------------------------------------------------------------
/tests/configs/_bert_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: google-bert/bert-base-uncased
3 |   task: feature-extraction
4 | 


--------------------------------------------------------------------------------
/tests/configs/_device_isolation_.yaml:
--------------------------------------------------------------------------------
1 | launcher:
2 |   device_isolation: true
3 |   device_isolation_action: warn
4 | 


--------------------------------------------------------------------------------
/tests/configs/_device_map_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   device_ids: 0,1
3 |   device_map: auto
4 |   model: openai-community/gpt2
5 | 


--------------------------------------------------------------------------------
/tests/configs/_st_bert_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: sentence-transformers/all-MiniLM-L6-v2
3 |   task: feature-extraction
4 | 


--------------------------------------------------------------------------------
/tests/configs/_torch_compile_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   torch_compile: true
3 |   torch_compile_config:
4 |     backend: eager
5 | 


--------------------------------------------------------------------------------
/tests/configs/_timm_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   library: timm
3 |   task: image-classification
4 |   model: timm/tiny_vit_21m_224.in1k
5 | 


--------------------------------------------------------------------------------
/tests/configs/_no_weights_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.no_weights: true,false
6 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/tensorrt_llm/utils.py:
--------------------------------------------------------------------------------
1 | MODEL_TYPE_TO_TRTLLMMODELS = {"llama": "optimum.nvidia.models.llama.LlamaForCausalLM"}
2 | 


--------------------------------------------------------------------------------
/tests/configs/_serving_mode_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.serving_mode: online,offline
6 | 


--------------------------------------------------------------------------------
/tests/configs/_training_.yaml:
--------------------------------------------------------------------------------
1 | defaults:
2 |   - override scenario: training
3 | 
4 | scenario:
5 |   max_steps: 5
6 |   warmup_steps: 2
7 | 


--------------------------------------------------------------------------------
/tests/configs/_vllm_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   serving_mode: offline
3 |   engine_args:
4 |     max_model_len: 512
5 |     enforce_eager: true
6 | 


--------------------------------------------------------------------------------
/tests/configs/_awq_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: TheBloke/TinyLlama-1.1B-Chat-v1.0-AWQ
3 |   quantization_scheme: awq
4 |   quantization_config:
5 |     version: exllama
6 | 


--------------------------------------------------------------------------------
/tests/configs/_tensorrt_llm_pp_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3 |   gpus_per_node: 2
4 |   device_ids: 0,1
5 |   world_size: 2
6 |   pp: 2
7 | 


--------------------------------------------------------------------------------
/tests/configs/_tensorrt_llm_tp_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3 |   gpus_per_node: 2
4 |   device_ids: 0,1
5 |   world_size: 2
6 |   tp: 2
7 | 


--------------------------------------------------------------------------------
/tests/configs/_ort_quant_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: google-bert/bert-base-uncased
3 |   quantization: true
4 |   quantization_config:
5 |     is_static: true
6 |   calibration: true
7 | 


--------------------------------------------------------------------------------
/tests/configs/_peft_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   peft_type: LORA
3 | 
4 | hydra:
5 |   mode: MULTIRUN
6 |   sweeper:
7 |     params:
8 |       backend.model: openai-community/gpt2,google-bert/bert-base-uncased


--------------------------------------------------------------------------------
/tests/configs/_bnb_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3 |   quantization_scheme: bnb
4 |   quantization_config:
5 |     load_in_4bit: true
6 |     bnb_4bit_compute_dtype: float16
7 | 


--------------------------------------------------------------------------------
/tests/configs/_gptq_.yaml:
--------------------------------------------------------------------------------
1 | backend:
2 |   model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ
3 |   quantization_scheme: gptq
4 |   quantization_config:
5 |     exllama_config:
6 |       version: 2
7 |       max_input_len: 512
8 | 


--------------------------------------------------------------------------------
/tests/configs/_text_encoders_decoders_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.task: text2text-generation
6 |       backend.model: hf-internal-testing/tiny-random-T5ForConditionalGeneration
7 | 


--------------------------------------------------------------------------------
/tests/configs/_diffusers_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.library: diffusers
6 |       backend.task: text-to-image
7 |       backend.model: hf-internal-testing/tiny-stable-diffusion-torch
8 | 


--------------------------------------------------------------------------------
/tests/configs/_text_decoders_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.task: text-generation
6 |       backend.model: hf-internal-testing/tiny-random-GPT2LMHeadModel,hf-internal-testing/tiny-random-LlamaForCausalLM
7 | 


--------------------------------------------------------------------------------
/tests/configs/_gguf_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.task: text-generation,feature-extraction
6 |       backend.filename: DistilGPT2-TinyStories.Q4_K_S.gguf
7 |       backend.model: mradermacher/DistilGPT2-TinyStories-GGUF
8 | 


--------------------------------------------------------------------------------
/tests/configs/_text_encoders_.yaml:
--------------------------------------------------------------------------------
1 | hydra:
2 |   mode: MULTIRUN
3 |   sweeper:
4 |     params:
5 |       backend.task: fill-mask,text-classification,token-classification,question-answering
6 |       backend.model: hf-internal-testing/tiny-random-BertModel,hf-internal-testing/tiny-random-RobertaModel
7 | 


--------------------------------------------------------------------------------
/tests/configs/_ddp_.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override launcher: torchrun
 3 | 
 4 | launcher:
 5 |   nproc_per_node: 2
 6 | 
 7 | backend:
 8 |   device_ids: 0,1
 9 |   model: hf-internal-testing/tiny-random-LlamaForCausalLM
10 | 
11 | hydra:
12 |   job:
13 |     env_set:
14 |       LOG_ALL_RANKS: 1
15 | 


--------------------------------------------------------------------------------
/tests/configs/_tp_.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override launcher: torchrun
 3 | 
 4 | launcher:
 5 |   nproc_per_node: 2
 6 | 
 7 | backend:
 8 |   tp_plan: auto
 9 |   device_ids: 0,1
10 |   model: hf-internal-testing/tiny-random-LlamaForCausalLM
11 | 
12 | hydra:
13 |   job:
14 |     env_set:
15 |       LOG_ALL_RANKS: 1


--------------------------------------------------------------------------------
/tests/configs/_vllm_pp_.yaml:
--------------------------------------------------------------------------------
 1 | backend:
 2 |   device_ids: 0,1
 3 |   serving_mode: online
 4 |   engine_args:
 5 |     max_model_len: 512
 6 |     enforce_eager: true
 7 |     pipeline_parallel_size: 2
 8 |     distributed_executor_backend: mp
 9 | 
10 | hydra:
11 |   job:
12 |     env_set:
13 |       VLLM_WORKER_MULTIPROC_METHOD: spawn
14 | 


--------------------------------------------------------------------------------
/tests/configs/_vllm_tp_.yaml:
--------------------------------------------------------------------------------
 1 | backend:
 2 |   device_ids: 0,1
 3 |   serving_mode: offline
 4 |   engine_args:
 5 |     max_model_len: 512
 6 |     enforce_eager: true
 7 |     tensor_parallel_size: 2
 8 |     distributed_executor_backend: mp
 9 | 
10 | hydra:
11 |   job:
12 |     env_set:
13 |       VLLM_WORKER_MULTIPROC_METHOD: spawn
14 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .config import LauncherConfig  # noqa: F401
 2 | from .inline.config import InlineConfig  # noqa: F401
 3 | from .process.config import ProcessConfig  # noqa: F401
 4 | from .torchrun.config import TorchrunConfig  # noqa: F401
 5 | 
 6 | __all__ = [
 7 |     "InlineConfig",
 8 |     "ProcessConfig",
 9 |     "TorchrunConfig",
10 |     "LauncherConfig",
11 | ]
12 | 


--------------------------------------------------------------------------------
/uv.toml:
--------------------------------------------------------------------------------
 1 | # UV package manager configuration for optimum-benchmark
 2 | 
 3 | # Use the latest stable Python
 4 | python-preference = "only-managed"
 5 | 
 6 | # Resolution strategy
 7 | resolution = "highest"
 8 | 
 9 | # Enable preview features
10 | preview = true
11 | 
12 | # Extra build dependencies for specific packages
13 | [extra-build-dependencies]
14 | flash-attn = ["torch"]
15 | gptqmodel = ["torch"]
16 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/__init__.py:
--------------------------------------------------------------------------------
 1 | from .config import ScenarioConfig  # noqa: F401
 2 | from .energy_star.config import EnergyStarConfig  # noqa: F401
 3 | from .inference.config import InferenceConfig  # noqa: F401
 4 | from .training.config import TrainingConfig  # noqa: F401
 5 | 
 6 | __all__ = [
 7 |     "EnergyStarConfig",
 8 |     "InferenceConfig",
 9 |     "TrainingConfig",
10 |     "ScenarioConfig",
11 | ]
12 | 


--------------------------------------------------------------------------------
/tests/configs/_image_text_to_text_.yaml:
--------------------------------------------------------------------------------
 1 | hydra:
 2 |   mode: MULTIRUN
 3 |   sweeper:
 4 |     params:
 5 |       backend.task: image-text-to-text
 6 |       backend.model:
 7 |         hf-internal-testing/tiny-random-BlipForConditionalGeneration,
 8 |         hf-internal-testing/tiny-random-IdeficsForVisionText2Text,
 9 |         hf-internal-testing/tiny-random-GitForCausalLM
10 |       +scenario.input_shapes.num_images: 2
11 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_timm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _timm_ # inherits from timm config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: pytorch
 9 | 
10 | name: cpu_inference_pytorch_timm
11 | 


--------------------------------------------------------------------------------
/tests/configs/_inference_.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override scenario: inference
 3 | 
 4 | scenario:
 5 |   memory: true
 6 |   latency: true
 7 | 
 8 |   duration: 1
 9 |   iterations: 1
10 |   warmup_runs: 1
11 | 
12 |   input_shapes:
13 |     batch_size: 1
14 |     sequence_length: 16
15 | 
16 |   generate_kwargs:
17 |     max_new_tokens: 16
18 |     min_new_tokens: 16
19 | 
20 |   call_kwargs:
21 |     num_inference_steps: 4
22 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_llama_cpp_gguf.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _gguf_ # inherits from llama_cpp config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: llama_cpp
 9 | 
10 | name: cpu_inference_llama_cpp_gpt2_gguf
11 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_openvino_diffusers.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _diffusers_ # inherits from diffusers config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: openvino
 9 | 
10 | name: cpu_inference_openvino_diffusers
11 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_diffusers.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _diffusers_ # inherits from diffusers config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: pytorch
 9 | 
10 | name: cpu_inference_pytorch_diffusers
11 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/config.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from dataclasses import dataclass
 3 | from logging import getLogger
 4 | from typing import TypeVar
 5 | 
 6 | LOGGER = getLogger("benchmark")
 7 | 
 8 | 
 9 | @dataclass
10 | class ScenarioConfig(ABC):
11 |     name: str
12 |     _target_: str
13 | 
14 |     def __post_init__(self):
15 |         pass
16 | 
17 | 
18 | ScenarioConfigT = TypeVar("ScenarioConfigT", bound=ScenarioConfig)
19 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_tensorrt_llm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _self_ # hydra 1.1 compatibility
 7 |   - override backend: tensorrt-llm
 8 | 
 9 | name: cuda_inference_tensorrt_llm
10 | 
11 | backend:
12 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_tensorrt_llm_pp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _tensorrt_llm_pp_ # inherits from tensorrt_llm_pp config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: tensorrt-llm
 9 | 
10 | name: cuda_inference_tensorrt_llm_pp
11 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_tensorrt_llm_tp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _tensorrt_llm_tp_ # inherits from tensorrt_llm_tp config
 7 |   - _self_ # hydra 1.1 compatibility
 8 |   - override backend: tensorrt-llm
 9 | 
10 | name: cuda_inference_tensorrt_llm_tp
11 | 


--------------------------------------------------------------------------------
/optimum_benchmark/process_utils.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing.connection import Connection
 2 | from typing import TypeVar
 3 | 
 4 | DeserializedType = TypeVar("DeserializedType")
 5 | 
 6 | 
 7 | def sync_with_parent(child_connection: Connection) -> None:
 8 |     child_connection.recv()
 9 |     child_connection.send(0)
10 | 
11 | 
12 | def sync_with_child(parent_connection: Connection) -> None:
13 |     parent_connection.send(0)
14 |     parent_connection.recv()
15 | 


--------------------------------------------------------------------------------
/tests/configs/_deepspeed_inference_.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - override launcher: torchrun
 3 | 
 4 | launcher:
 5 |   nproc_per_node: 2
 6 | 
 7 | backend:
 8 |   device_ids: 0,1
 9 |   model: google-bert/bert-base-uncased
10 |   deepspeed_inference: true
11 |   deepspeed_inference_config:
12 |     tensor_parallel:
13 |       tp_size: 2
14 | 
15 | scenario:
16 |   input_shapes:
17 |     batch_size: 2
18 | 
19 | hydra:
20 |   job:
21 |     env_set:
22 |       LOG_ALL_RANKS: 1
23 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_py_txi_gpt2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _gpt2_ # inherits from gpt2 config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: py-txi
10 | 
11 | name: cpu_inference_py_txi_gpt2
12 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_py_txi_gpt2.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _gpt2_ # inherits from gpt2 config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: py-txi
10 | 
11 | name: cuda_inference_py_txi_gpt2
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_timm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _export_ # inherits from export config
 7 |   - _timm_ # inherits from timm config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: onnxruntime
10 | 
11 | name: cpu_inference_onnxruntime_timm
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_py_txi_st_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _st_bert_ # inherits from bert config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: py-txi
10 | 
11 | name: cpu_inference_py_txi_st_bert
12 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_py_txi_st_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _st_bert_ # inherits from bert config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: py-txi
10 | 
11 | name: cuda_inference_py_txi_st_bert
12 | 


--------------------------------------------------------------------------------
/examples/mps_pytorch_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: inline # mps fails with python multi-processing for some reason
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: mps_pytorch_bert
10 | 
11 | backend:
12 |   device: mps
13 |   no_weights: true
14 |   model: bert-base-uncased
15 | 
16 | scenario:
17 |   memory: true
18 |   latency: true
19 |   input_shapes:
20 |     batch_size: 1
21 |     sequence_length: 128
22 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_timm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _timm_ # inherits from timm config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cuda_inference_pytorch_timm
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_ipex_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: ipex
10 | 
11 | name: cpu_inference_ipex_text_decoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_diffusers.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _diffusers_ # inherits from diffusers config
 7 |   - _export_ # inherits from export config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: onnxruntime
10 | 
11 | name: cpu_inference_onnxruntime_diffusers
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_training_pytorch_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _training_ # inherits from training config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_training_pytorch_text_decoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_training_pytorch_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _training_ # inherits from training config
 6 |   - _text_encoders_ # inherits from text encoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_training_pytorch_text_encoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_ipex_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders sweep config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: ipex
10 | 
11 | name: cpu_inference_ipex_text_encoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_text_decoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_text_encoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_timm_torch_compile.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _torch_compile_ # inherits from  torch compile config
 6 |   - _inference_ # inherits from inference config
 7 |   - _timm_ # inherits from timm config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_timm_torch_compile
12 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_diffusers.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _diffusers_ # inherits from diffusers config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cuda_inference_pytorch_diffusers
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_openvino_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: openvino
10 | 
11 | name: cpu_inference_openvino_text_decoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_openvino_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders sweep config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: openvino
10 | 
11 | name: cpu_inference_openvino_text_encoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_diffusers_torch_compile.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _torch_compile_ # inherits from  torch compile config
 7 |   - _diffusers_ # inherits from diffusers config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_diffusers_torch_compile
12 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_image_text_to_text.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _image_text_to_text_ # inherits from image text to text config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_image_text_to_text
12 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_vllm_bloom_pp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _vllm_pp_ # inherits from vllm pp config
 8 |   - _bloom_ # inherits from bloom config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: vllm
11 | 
12 | name: cuda_inference_vllm_bloom_pp
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_vllm_bloom_tp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _no_weights_ # inherits from no weights config
 7 |   - _vllm_tp_ # inherits from vllm tp config
 8 |   - _bloom_ # inherits from bloom config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: vllm
11 | 
12 | name: cuda_inference_vllm_bloom_tp
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_ddp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _ddp_ # inherits from ddp config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_ddp
13 | 


--------------------------------------------------------------------------------
/examples/cpu_llama_cpp_text_generation.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - backend: llama_cpp
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_llama_cpp_text_generation
10 | 
11 | backend:
12 |   device: cpu
13 |   task: text-generation
14 |   model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
15 |   filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf
16 | 
17 | scenario:
18 |   memory: true
19 |   latency: true
20 | 
21 |   input_shapes:
22 |     batch_size: 1
23 |     sequence_length: 128
24 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_pytorch_text_encoders_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_decoders_ # inherits from text encoders decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _self_ # hydra 1.1 compatibility
 9 |   - override backend: pytorch
10 | 
11 | name: cpu_inference_pytorch_text_encoders_decoders
12 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_bnb.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _bnb_ # inherits from bnb config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_bnb
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_gptq.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _gptq_ # inherits from gptq config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_gptq
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_dp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _dp_ # inherits from data parallel config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_dp
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_peft.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _peft_ # inherits from peft config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_peft
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_tp.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _tp_ # inherits from tensor parallel config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_tp
13 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_pytorch_bert
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: google-bert/bert-base-uncased
20 | 
21 | scenario:
22 |   memory: true
23 |   latency: true
24 | 
25 |   input_shapes:
26 |     batch_size: 1
27 |     sequence_length: 128
28 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_device_map.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _device_map_ # inherits from device map config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_device_map
13 | 


--------------------------------------------------------------------------------
/examples/cpu_llama_cpp_embedding.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - backend: llama_cpp
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_llama_cpp_embedding
10 | 
11 | backend:
12 |   device: cpu
13 |   task: feature-extraction
14 |   model: nomic-ai/nomic-embed-text-v1.5-GGUF
15 |   filename: nomic-embed-text-v1.5.Q4_0.gguf
16 | 
17 | scenario:
18 |   input_shapes:
19 |     batch_size: 1
20 |     sequence_length: 64
21 | 
22 |   generate_kwargs:
23 |     max_new_tokens: 32
24 |     min_new_tokens: 32
25 | 


--------------------------------------------------------------------------------
/examples/cpu_openvino_diffusion.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - backend: openvino
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: openvino_diffusion
10 | 
11 | backend:
12 |   device: cpu
13 |   export: true
14 |   task: text-to-image
15 |   model: stabilityai/stable-diffusion-2-1
16 |   half: false # enable half-precision on compatible Intel CPU machines
17 | 
18 | scenario:
19 |   input_shapes:
20 |     batch_size: 1
21 |     sequence_length: 16
22 | 
23 |   call_kwargs:
24 |     num_inference_steps: 4
25 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_ort_quant.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _ort_quant_ # inherits from ort static quant config
 7 |   - _no_weights_ # inherits from no weights sweep config
 8 |   - _export_ # inherits from export config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: onnxruntime
11 | 
12 | name: cpu_inference_onnxruntime_ort_quant
13 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _export_ # inherits from export config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: onnxruntime
11 | 
12 | name: cpu_inference_onnxruntime_text_decoders
13 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _export_ # inherits from export config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: onnxruntime
11 | 
12 | name: cpu_inference_onnxruntime_text_encoders
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_device_map.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _device_map_ # inherits from device map config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_device_map
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_timm_torch_compile.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _torch_compile_ # inherits from  torch compile config
 8 |   - _timm_ # inherits from timm config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_timm_torch_compile
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_vllm_bloom.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _serving_mode_ # inherits from serving_mode config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _bloom_ # inherits from bloom config
 9 |   - _vllm_ # inherits from vllm config
10 |   - _self_ # hydra 1.1 compatibility
11 |   - override backend: vllm
12 | 
13 | name: cuda_inference_vllm_bloom
14 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_text_decoders
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_training_pytorch_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _training_ # inherits from training config
 6 |   - _text_encoders_ # inherits from text encoders config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_training_pytorch_text_encoders
13 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Pytest configuration for optimum-benchmark tests."""
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | def pytest_sessionfinish(session, exitstatus):
 7 |     """
 8 |     Hook to modify the exit status when no tests are collected.
 9 | 
10 |     This prevents pytest from returning a non-zero exit code when no tests
11 |     are found, which is useful for CI/CD pipelines where some test runs
12 |     might legitimately have no tests to run.
13 |     """
14 |     if exitstatus == pytest.ExitCode.NO_TESTS_COLLECTED:
15 |         session.exitstatus = pytest.ExitCode.OK
16 | 


--------------------------------------------------------------------------------
/examples/cpu_openvino_8bit_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - backend: openvino
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_openvino_8bit_bert
10 | 
11 | backend:
12 |   device: cpu
13 |   reshape: true
14 |   no_weights: true
15 |   load_in_8bit: true
16 |   model: google-bert/bert-base-uncased
17 |   reshape_kwargs:
18 |     batch_size: 1
19 |     sequence_length: 128
20 | 
21 | scenario:
22 |   memory: true
23 |   latency: true
24 | 
25 |   input_shapes:
26 |     batch_size: 1
27 |     sequence_length: 128
28 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_text_decoders
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_text_encoders
13 | 


--------------------------------------------------------------------------------
/examples/cpu_onnxruntime_static_quant_vit.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: onnxruntime
 4 |   - scenario: inference
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_onnxruntime_static_quant_vit
10 | 
11 | backend:
12 |   device: cpu
13 |   export: true
14 |   no_weights: true
15 |   model: google/vit-base-patch16-224
16 |   quantization: true
17 |   quantization_config:
18 |     is_static: true
19 |     per_channel: false
20 |   calibration: true
21 | 
22 | scenario:
23 |   memory: true
24 |   latency: true
25 |   input_shapes:
26 |     batch_size: 2
27 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_diffusers_torch_compile.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _device_isolation_ # inherits from device isolation config
 7 |   - _torch_compile_ # inherits from  torch compile config
 8 |   - _diffusers_ # inherits from diffusers config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_diffusers_torch_compile
13 | 


--------------------------------------------------------------------------------
/docker/unroot/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG IMAGE="optimum-benchmark:latest"
 2 | 
 3 | FROM $IMAGE
 4 | 
 5 | # Create a non-root user
 6 | ARG USER_ID
 7 | ARG GROUP_ID
 8 | ENV PATH="/home/user/.local/bin:${PATH}"
 9 | 
10 | RUN addgroup --gid $GROUP_ID group
11 | RUN adduser --disabled-password --gecos '' --uid $USER_ID --gid $GROUP_ID user
12 | 
13 | # For ROCm, the user needs to be in the video and render groups, check with /opt/rocm/
14 | RUN if [ -d /opt/rocm/ ]; then usermod -a -G video user; fi
15 | RUN if [ -d /opt/rocm/ ]; then usermod -a -G render user; fi
16 | 
17 | USER user
18 | WORKDIR /home/user
19 | 
20 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/process/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from ..config import LauncherConfig
 4 | 
 5 | 
 6 | @dataclass
 7 | class ProcessConfig(LauncherConfig):
 8 |     name: str = "process"
 9 |     _target_: str = "optimum_benchmark.launchers.process.launcher.ProcessLauncher"
10 | 
11 |     start_method: str = "spawn"
12 | 
13 |     def __post_init__(self):
14 |         super().__post_init__()
15 | 
16 |         if self.start_method not in ["spawn", "fork"]:
17 |             raise ValueError(f"start_method must be one of ['spawn', 'fork'], got {self.start_method}")
18 | 


--------------------------------------------------------------------------------
/tests/configs/cpu_inference_onnxruntime_text_encoders_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cpu_ # inherits from cpu config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_decoders_ # inherits from text encoders decoders config
 7 |   - _no_weights_ # inherits from no weights config
 8 |   - _export_ # inherits from export config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: onnxruntime
11 | 
12 | name: cpu_inference_onnxruntime_text_encoders_decoders
13 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_pytorch_deepspeed_inference.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _deepspeed_inference_ # inherits from deepspeed inference config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _self_ # hydra 1.1 compatibility
10 |   - override backend: pytorch
11 | 
12 | name: cuda_inference_pytorch_deepspeed_inference
13 | 


--------------------------------------------------------------------------------
/examples/_base_.yaml:
--------------------------------------------------------------------------------
 1 | log_report: true
 2 | print_report: true
 3 | 
 4 | # hydra/cli specific settings
 5 | hydra:
 6 |   run:
 7 |     # define run directory
 8 |     dir: runs/${name}
 9 |   sweep:
10 |     # define sweep directory
11 |     dir: sweeps/${name}
12 |     subdir: ${hydra.job.override_dirname}
13 |   job:
14 |     # change working directory to the job directory
15 |     # so that artifacts are stored there
16 |     chdir: true
17 |     env_set:
18 |       # set environment variable OVERRIDE_BENCHMARKS to 1
19 |       # to not skip benchmarks that have been run before
20 |       OVERRIDE_BENCHMARKS: 1
21 | 


--------------------------------------------------------------------------------
/energy_star/_base_.yaml:
--------------------------------------------------------------------------------
 1 | log_report: true
 2 | print_report: true
 3 | 
 4 | # hydra/cli specific settings
 5 | hydra:
 6 |   run:
 7 |     # define run directory
 8 |     dir: runs/${name}
 9 |   sweep:
10 |     # define sweep directory
11 |     dir: sweeps/${name}
12 |     subdir: ${hydra.job.override_dirname}
13 |   job:
14 |     # change working directory to the job directory
15 |     # so that artifacts are stored there
16 |     chdir: true
17 |     env_set:
18 |       # set environment variable OVERRIDE_BENCHMARKS to 1
19 |       # to not skip benchmarks that have been run before
20 |       OVERRIDE_BENCHMARKS: 1
21 | 


--------------------------------------------------------------------------------
/energy_star/object_detection.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: object_detection
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: object-detection
20 |   model: facebook/detr-resnet-50
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/object_detection
24 |   image_column_name: image
25 |   num_samples: 1000
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 | 


--------------------------------------------------------------------------------
/energy_star/image_to_text.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: image_to_text
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: image-to-text
20 |   model: sashakunitsyn/vlrm-blip2-opt-2.7b
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/image_captioning
24 |   image_column_name: image
25 |   num_samples: 1000
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_llama.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_pytorch_llama
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   torch_dtype: float16
20 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
21 | 
22 | scenario:
23 |   input_shapes:
24 |     batch_size: 4
25 |     sequence_length: 64
26 | 
27 |   generate_kwargs:
28 |     max_new_tokens: 32
29 |     min_new_tokens: 32
30 | 


--------------------------------------------------------------------------------
/energy_star/image_classification.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: image_classification
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: image-classification
20 |   model: google/vit-base-patch16-224
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/image_classification
24 |   image_column_name: image
25 |   num_samples: 1000
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 | 


--------------------------------------------------------------------------------
/energy_star/automatic_speech_recognition.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: automatic_speech_recognition
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: openai/whisper-large-v3
20 |   task: automatic-speech-recognition
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/ASR
24 |   audio_column_name: audio
25 |   num_samples: 1000
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 | 


--------------------------------------------------------------------------------
/energy_star/text_to_image.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: image_generation_tiny
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: false
19 |   model: segmind/tiny-sd
20 | 
21 | scenario:
22 |   dataset_name: EnergyStarAI/image_generation
23 |   text_column_name: prompt
24 |   num_samples: 1000
25 | 
26 |   input_shapes:
27 |     batch_size: 1
28 | 
29 |   call_kwargs:
30 |     num_images_per_prompt: 1
31 | 


--------------------------------------------------------------------------------
/energy_star/text_classification.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: text_classification
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: text-classification
20 |   model: lvwerra/distilbert-imdb
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/text_classification
24 |   text_column_name: text
25 |   num_samples: 1000
26 |   truncation: True
27 | 
28 |   input_shapes:
29 |     batch_size: 1
30 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/ipex/utils.py:
--------------------------------------------------------------------------------
 1 | TASKS_TO_IPEXMODELS = {
 2 |     "fill-mask": "optimum.intel.IPEXModelForMaskedLM",
 3 |     "text-generation": "optimum.intel.IPEXModelForCausalLM",
 4 |     "feature-extraction": "optimum.intel.IPEXModel",
 5 |     "text-classification": "optimum.intel.IPEXModelForSequenceClassification",
 6 |     "token-classification": "optimum.intel.IPEXModelForTokenClassification",
 7 |     "question-answering": "optimum.intel.IPEXModelForQuestionAnswering",
 8 |     "image-classification": "optimum.intel.IPEXModelForImageClassification",
 9 |     "audio-classification": "optimum.intel.IPEXModelForAudioClassification",
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_onnxruntime_text_decoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_decoders_ # inherits from text decoders sweep config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _export_ # inherits from export config
10 |   - _self_ # hydra 1.1 compatibility
11 |   - override backend: onnxruntime
12 | 
13 | name: cuda_inference_onnxruntime_text_decoders_no_weights
14 | 


--------------------------------------------------------------------------------
/tests/configs/cuda_inference_onnxruntime_text_encoders.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   # order of inheritance, last one overrides previous ones
 3 |   - _base_ # inherits from base config
 4 |   - _cuda_ # inherits from cuda config
 5 |   - _inference_ # inherits from inference config
 6 |   - _text_encoders_ # inherits from text encoders sweep config
 7 |   - _device_isolation_ # inherits from device isolation config
 8 |   - _no_weights_ # inherits from no weights config
 9 |   - _export_ # inherits from export config
10 |   - _self_ # hydra 1.1 compatibility
11 |   - override backend: onnxruntime
12 | 
13 | name: cuda_inference_onnxruntime_text_encoders_no_weights
14 | 


--------------------------------------------------------------------------------
/energy_star/question_answering.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: question_answering
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: question-answering
20 |   model: deepset/electra-base-squad2
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/extractive_qa
24 |   question_column_name: question
25 |   context_column_name: context
26 |   num_samples: 1000
27 | 
28 |   input_shapes:
29 |     batch_size: 1
30 | 


--------------------------------------------------------------------------------
/examples/cuda_tgi_llama.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: py-txi
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_tgi_llama
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   cuda_graphs: 0 # remove for better perf but bigger memory footprint
19 |   no_weights: true
20 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
21 | 
22 | scenario:
23 |   input_shapes:
24 |     batch_size: 1
25 |     sequence_length: 64
26 | 
27 |   generate_kwargs:
28 |     max_new_tokens: 32
29 |     min_new_tokens: 32
30 | 


--------------------------------------------------------------------------------
/examples/cuda_trt_llama.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: tensorrt-llm
 4 |   - scenario: inference
 5 |   - launcher: process
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_trt_llama
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   max_batch_size: 4
20 |   max_new_tokens: 32
21 |   max_prompt_length: 64
22 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
23 | 
24 | scenario:
25 |   input_shapes:
26 |     batch_size: 1
27 |     sequence_length: 64
28 | 
29 |   generate_kwargs:
30 |     max_new_tokens: 32
31 |     min_new_tokens: 32
32 | 


--------------------------------------------------------------------------------
/examples/cpu_ipex_bert.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: ipex
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_ipex_bert
10 | 
11 | launcher:
12 |   numactl: true
13 |   numactl_kwargs:
14 |     cpunodebind: 0
15 |     membind: 0
16 | 
17 | backend:
18 |   device: cpu
19 |   no_weights: false # on multi-node machines, inline weights initialization harms performance
20 |   torch_dtype: float32 # use bfloat16 on compatible Intel CPUs
21 |   model: google-bert/bert-base-uncased
22 | 
23 | scenario:
24 |   memory: true
25 |   latency: true
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 |     sequence_length: 128
30 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/peft_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from transformers import PreTrainedModel
 4 | 
 5 | from ..import_utils import is_peft_available
 6 | 
 7 | if is_peft_available():
 8 |     from peft import PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_model  # type: ignore
 9 | 
10 | 
11 | def apply_peft(model: "PreTrainedModel", peft_type: str, peft_config: Dict[str, Any]) -> "PreTrainedModel":
12 |     if not is_peft_available():
13 |         raise ImportError("peft is not available. Please, pip install peft.")
14 | 
15 |     peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config)
16 | 
17 |     return get_peft_model(model=model, peft_config=peft_config)
18 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yaml:
--------------------------------------------------------------------------------
 1 | name: Quality Checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | concurrency:
12 |   cancel-in-progress: true
13 |   group: ${{ github.workflow }}-${{ github.ref }}
14 | 
15 | env:
16 |   UV_TORCH_BACKEND: cpu
17 | 
18 | jobs:
19 |   quality:
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - name: Checkout
24 |         uses: actions/checkout@v4
25 | 
26 |       - name: Install uv
27 |         uses: astral-sh/setup-uv@v6
28 |         with:
29 |           enable-cache: true
30 | 
31 |       - name: Run quality checks
32 |         run: |
33 |           make quality
34 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/__init__.py:
--------------------------------------------------------------------------------
 1 | from .config import BackendConfig
 2 | from .ipex.config import IPEXConfig
 3 | from .llama_cpp.config import LlamaCppConfig
 4 | from .onnxruntime.config import ONNXRuntimeConfig
 5 | from .openvino.config import OpenVINOConfig
 6 | from .py_txi.config import PyTXIConfig
 7 | from .pytorch.config import PyTorchConfig
 8 | from .tensorrt_llm.config import TRTLLMConfig
 9 | from .vllm.config import VLLMConfig
10 | 
11 | __all__ = [
12 |     "PyTorchConfig",
13 |     "ONNXRuntimeConfig",
14 |     "IPEXConfig",
15 |     "OpenVINOConfig",
16 |     "TRTLLMConfig",
17 |     "PyTXIConfig",
18 |     "BackendConfig",
19 |     "VLLMConfig",
20 |     "LlamaCppConfig",
21 | ]
22 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/inline/launcher.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, List
 2 | 
 3 | from ...benchmark.report import BenchmarkReport
 4 | from ..base import Launcher
 5 | from .config import InlineConfig
 6 | 
 7 | 
 8 | class InlineLauncher(Launcher[InlineConfig]):
 9 |     NAME = "inline"
10 | 
11 |     def __init__(self, config: InlineConfig):
12 |         super().__init__(config)
13 | 
14 |     def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
15 |         self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking")
16 | 
17 |         report = worker(*worker_args)
18 | 
19 |         return report
20 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_llama_compile_model.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_pytorch_llama_compile_model
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   torch_compile: true
20 |   torch_dtype: bfloat16
21 |   task: feature-extraction
22 |   torch_compile_target: model
23 |   model: NousResearch/Llama-2-13b-hf
24 | 
25 | scenario:
26 |   input_shapes:
27 |     batch_size: 4
28 |     sequence_length: 256
29 | 
30 |   forward_kwargs:
31 |     use_cache: false
32 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from logging import getLogger
 3 | from typing import ClassVar, Generic
 4 | 
 5 | from ..backends.base import Backend
 6 | from ..benchmark.report import BenchmarkReport
 7 | from .config import ScenarioConfigT
 8 | 
 9 | 
10 | class Scenario(Generic[ScenarioConfigT], ABC):
11 |     NAME: ClassVar[str]
12 | 
13 |     def __init__(self, config: ScenarioConfigT) -> None:
14 |         self.config = config
15 |         self.logger = getLogger(self.NAME)
16 |         self.logger.info(f"Allocating {self.NAME} scenario")
17 | 
18 |     def run(self, backend: Backend) -> BenchmarkReport:
19 |         raise NotImplementedError("Scenario must implement run method")
20 | 


--------------------------------------------------------------------------------
/energy_star/summarization.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: summarization
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   task: summarization
20 |   model: sshleifer/distilbart-cnn-12-6
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/summarization
24 |   text_column_name: text
25 |   num_samples: 1000
26 |   truncation: True
27 | 
28 |   input_shapes:
29 |     batch_size: 1
30 | 
31 |   generate_kwargs:
32 |     max_length: 10
33 |     min_new_tokens: 10
34 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_llama_compile_regions.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_pytorch_llama_compile_regions
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   torch_compile: true
20 |   torch_dtype: bfloat16
21 |   task: feature-extraction
22 |   torch_compile_target: regions
23 |   model: NousResearch/Llama-2-13b-hf
24 | 
25 | scenario:
26 |   input_shapes:
27 |     batch_size: 4
28 |     sequence_length: 256
29 | 
30 |   forward_kwargs:
31 |     use_cache: false
32 | 


--------------------------------------------------------------------------------
/optimum_benchmark/version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2024 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | __version__ = "0.7.0.dev0"
16 | 


--------------------------------------------------------------------------------
/energy_star/sentence_similarity.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: sentence_similarity_udever-bloom-7b1
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   library: transformers
20 |   task: sentence-similarity
21 |   model: sentence-transformers/all-MiniLM-L6-v2
22 | 
23 | scenario:
24 |   dataset_name: EnergyStarAI/sentence_similarity
25 |   sentence1_column_name: sentence1
26 |   sentence2_column_name: sentence2
27 |   num_samples: 1000
28 | 
29 |   input_shapes:
30 |     batch_size: 1
31 | 


--------------------------------------------------------------------------------
/energy_star/t5_question_answering.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: question_answering_t5
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: google-t5/t5-large
20 |   task: text2text-generation
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/extractive_qa
24 |   question_column_name: question
25 |   context_column_name: context
26 |   dataset_prefix1: "question: "
27 |   dataset_prefix2: " context: "
28 |   t5_task: question_answering
29 |   num_samples: 1000
30 | 
31 |   input_shapes:
32 |     batch_size: 1
33 | 


--------------------------------------------------------------------------------
/energy_star/t5_summarization.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: summarization_t5
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: google-t5/t5-large
20 |   task: text2text-generation
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/summarization
24 |   dataset_prefix1: "summarize: "
25 |   text_column_name: text
26 |   t5_task: summarization
27 |   num_samples: 1000
28 |   truncation: True
29 | 
30 |   input_shapes:
31 |     batch_size: 1
32 | 
33 |   generate_kwargs:
34 |     max_new_tokens: 10
35 |     min_new_tokens: 10
36 | 


--------------------------------------------------------------------------------
/energy_star/t5_text_generation.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: text2text_generation_aya
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: google-t5/t5-large
20 |   task: text2text-generation
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/text_generation
24 |   t5_task: text_generation
25 |   text_column_name: text
26 |   dataset_prefix1: ""
27 |   num_samples: 1000
28 |   truncation: True
29 | 
30 |   input_shapes:
31 |     batch_size: 1
32 | 
33 |   generate_kwargs:
34 |     max_new_tokens: 10
35 |     min_new_tokens: 10
36 | 


--------------------------------------------------------------------------------
/examples/cpu_ipex_llama.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: ipex
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cpu_ipex_llama
10 | 
11 | launcher:
12 |   numactl: true
13 |   numactl_kwargs:
14 |     cpunodebind: 0
15 |     membind: 0
16 | 
17 | backend:
18 |   device: cpu
19 |   no_weights: false # on multi-node machines, intializing weights in the benchmark could harm performance
20 |   torch_dtype: float32 # use bfloat16 on compatible Intel CPUs
21 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
22 | 
23 | scenario:
24 |   memory: true
25 |   latency: true
26 | 
27 |   input_shapes:
28 |     batch_size: 1
29 |     sequence_length: 64
30 | 
31 |   generate_kwargs:
32 |     max_new_tokens: 32
33 |     min_new_tokens: 32
34 | 


--------------------------------------------------------------------------------
/examples/cuda_vllm_llama.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: vllm
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_vllm_llama
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   serving_mode: online
20 |   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
21 |   engine_args:
22 |     enforce_eager: true # remove for better perf but bigger memory footprint
23 | 
24 | scenario:
25 |   input_shapes:
26 |     batch_size: 1
27 |     sequence_length: 64
28 | 
29 |   generate_kwargs:
30 |     max_new_tokens: 32
31 |     min_new_tokens: 32
32 | 
33 | hydra:
34 |   job:
35 |     env_set:
36 |       VLLM_USE_V1: 0
37 | 


--------------------------------------------------------------------------------
/optimum_benchmark/trackers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .energy import Efficiency, Energy, EnergyTracker
 2 | from .latency import (
 3 |     Latency,
 4 |     LatencySessionTracker,
 5 |     LatencyTracker,
 6 |     PerStepLatencySessionTrackerPipelineCallback,
 7 |     PerTokenLatencySessionTrackerLogitsProcessor,
 8 |     StepLatencyTrackerTrainerCallback,
 9 |     Throughput,
10 | )
11 | from .memory import Memory, MemoryTracker
12 | 
13 | __all__ = [
14 |     "Efficiency",
15 |     "Energy",
16 |     "EnergyTracker",
17 |     "Latency",
18 |     "LatencySessionTracker",
19 |     "LatencyTracker",
20 |     "PerStepLatencySessionTrackerPipelineCallback",
21 |     "PerTokenLatencySessionTrackerLogitsProcessor",
22 |     "StepLatencyTrackerTrainerCallback",
23 |     "Throughput",
24 |     "Memory",
25 |     "MemoryTracker",
26 | ]
27 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/inline/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | from ..config import LauncherConfig
 4 | 
 5 | 
 6 | @dataclass
 7 | class InlineConfig(LauncherConfig):
 8 |     name: str = "inline"
 9 |     _target_: str = "optimum_benchmark.launchers.inline.launcher.InlineLauncher"
10 | 
11 |     def __post_init__(self):
12 |         super().__post_init__()
13 | 
14 |         if self.device_isolation:
15 |             raise ValueError(
16 |                 "Device isolation is not supported with the inline launcher. Use `process` launcher instead."
17 |             )
18 | 
19 |         if self.device_isolation_action is not None:
20 |             raise ValueError(
21 |                 "Device isolation is not supported with the inline launcher. Use `process` launcher instead."
22 |             )
23 | 


--------------------------------------------------------------------------------
/tests/configs/_base_.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark # parent schema
 3 |   - backend: pytorch # default backend
 4 |   - launcher: process # default launcher
 5 |   - scenario: inference # default scenario
 6 |   - _self_
 7 | 
 8 | print_report: true
 9 | log_report: true
10 | 
11 | # hydra/cli specific settings
12 | hydra:
13 |   run:
14 |     # define run directory
15 |     dir: runs/${name}
16 |   sweep:
17 |     # define sweep directory
18 |     dir: sweeps/${name}
19 |     subdir: ${hydra.job.override_dirname}
20 |   job:
21 |     # change working directory to the job directory
22 |     # so that artifacts are stored there
23 |     chdir: true
24 |     env_set:
25 |       # set environment variable OVERRIDE_BENCHMARKS to 1
26 |       # to not skip benchmarks that have been run before
27 |       OVERRIDE_BENCHMARKS: 1
28 | 


--------------------------------------------------------------------------------
/energy_star/t5_text_classification.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: text_classification_t5
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   model: google-t5/t5-large
20 |   task: text2text-generation
21 | 
22 | scenario:
23 |   dataset_name: EnergyStarAI/text_classification
24 |   dataset_prefix1: "sst2 sentence: "
25 |   t5_task: text_classification
26 |   text_column_name: text
27 | 
28 |   num_samples: 1000
29 |   truncation: True
30 | 
31 |   input_shapes:
32 |     batch_size: 1
33 | 
34 |   generate_kwargs:
35 |     max_new_tokens: 10
36 |     min_new_tokens: 10
37 | 


--------------------------------------------------------------------------------
/energy_star/text_generation.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - backend: pytorch
 4 |   - launcher: process
 5 |   - scenario: energy_star
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: text_generation
10 | 
11 | launcher:
12 |   device_isolation: False
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: False
19 |   task: text-generation
20 |   model: openai/gpt-oss-20b
21 |   torch_dtype: auto
22 |   device_map: auto
23 | 
24 | scenario:
25 |   dataset_name: EnergyStarAI/text_generation
26 |   text_column_name: text
27 |   num_samples: 1000
28 |   truncation: True
29 |   reasoning: True
30 |   reasoning_params:
31 |     reasoning_effort: high
32 | 
33 |   input_shapes:
34 |     batch_size: 1
35 | 
36 |   generate_kwargs:
37 |     max_new_tokens: 10
38 |     min_new_tokens: 10
39 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_vlm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - benchmark
 3 |   - scenario: inference
 4 |   - launcher: process
 5 |   - backend: pytorch
 6 |   - _base_
 7 |   - _self_
 8 | 
 9 | name: cuda_pytorch_vlm
10 | 
11 | launcher:
12 |   device_isolation: true
13 |   device_isolation_action: warn
14 | 
15 | backend:
16 |   device: cuda
17 |   device_ids: 0
18 |   no_weights: true
19 |   torch_dtype: float16
20 |   model: Qwen/Qwen2-VL-7B-Instruct
21 | 
22 | scenario:
23 |   memory: true
24 |   latency: true
25 | 
26 |   warmup_runs: 10
27 |   iterations: 10
28 |   duration: 10
29 | 
30 |   input_shapes:
31 |     # text
32 |     batch_size: 1
33 |     sequence_length: 64
34 |     # image
35 |     num_images: 2
36 |     num_channels: 3
37 |     height: 224
38 |     width: 224
39 | 
40 |   generate_kwargs:
41 |     max_new_tokens: 32
42 |     min_new_tokens: 32
43 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/llama_cpp/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Optional
 3 | 
 4 | from ...import_utils import llama_cpp_version
 5 | from ..config import BackendConfig
 6 | 
 7 | 
 8 | @dataclass
 9 | class LlamaCppConfig(BackendConfig):
10 |     name: str = "llama_cpp"
11 |     version: Optional[str] = llama_cpp_version()
12 |     _target_: str = "optimum_benchmark.backends.llama_cpp.backend.LlamaCppBackend"
13 | 
14 |     no_weights: bool = False
15 | 
16 |     # llamamodel kwargs
17 |     filename: Optional[str] = None
18 | 
19 |     def __post_init__(self):
20 |         self.library = "llama_cpp"
21 |         self.model_type = "llama_cpp"
22 | 
23 |         super().__post_init__()
24 | 
25 |         if self.task not in ["feature-extraction", "text-generation"]:
26 |             raise NotImplementedError(f"Task {self.task} is not supported by LlamaCpp backend.")
27 | 
28 |         if self.no_weights:
29 |             raise NotImplementedError("`no_weights` benchmarking is not supported by LlamaCpp backend.")
30 | 


--------------------------------------------------------------------------------
/optimum_benchmark/benchmark/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Any, Dict
 3 | 
 4 | from ..hub_utils import PushToHubMixin, classproperty
 5 | from ..import_utils import get_hf_libs_info
 6 | from ..system_utils import get_system_info
 7 | 
 8 | 
 9 | @dataclass
10 | class BenchmarkConfig(PushToHubMixin):
11 |     name: str
12 | 
13 |     # BACKEND CONFIGURATION
14 |     backend: Any  # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386
15 |     # SCENARIO CONFIGURATION
16 |     scenario: Any  # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386
17 |     # LAUNCHER CONFIGURATION
18 |     launcher: Any  # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386
19 | 
20 |     # ENVIRONMENT CONFIGURATION
21 |     environment: Dict[str, Any] = field(default_factory=lambda: {**get_system_info(), **get_hf_libs_info()})
22 | 
23 |     print_report: bool = False
24 |     log_report: bool = True
25 | 
26 |     @classproperty
27 |     def default_filename(cls) -> str:
28 |         return "benchmark_config.json"
29 | 


--------------------------------------------------------------------------------
/optimum_benchmark/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backends import (
 2 |     BackendConfig,
 3 |     IPEXConfig,
 4 |     LlamaCppConfig,
 5 |     ONNXRuntimeConfig,
 6 |     OpenVINOConfig,
 7 |     PyTorchConfig,
 8 |     PyTXIConfig,
 9 |     TRTLLMConfig,
10 |     VLLMConfig,
11 | )
12 | from .benchmark.base import Benchmark
13 | from .benchmark.config import BenchmarkConfig
14 | from .benchmark.report import BenchmarkReport
15 | from .launchers import InlineConfig, LauncherConfig, ProcessConfig, TorchrunConfig
16 | from .scenarios import EnergyStarConfig, InferenceConfig, ScenarioConfig, TrainingConfig
17 | 
18 | __all__ = [
19 |     "BackendConfig",
20 |     "Benchmark",
21 |     "BenchmarkConfig",
22 |     "BenchmarkReport",
23 |     "EnergyStarConfig",
24 |     "InferenceConfig",
25 |     "IPEXConfig",
26 |     "InlineConfig",
27 |     "LauncherConfig",
28 |     "ONNXRuntimeConfig",
29 |     "OpenVINOConfig",
30 |     "ProcessConfig",
31 |     "PyTorchConfig",
32 |     "PyTXIConfig",
33 |     "ScenarioConfig",
34 |     "TorchrunConfig",
35 |     "TrainingConfig",
36 |     "TRTLLMConfig",
37 |     "VLLMConfig",
38 |     "LlamaCppConfig",
39 | ]
40 | 


--------------------------------------------------------------------------------
/.github/workflows/security.yml:
--------------------------------------------------------------------------------
 1 | name: Security Checks
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 | permissions:
 7 |   contents: read
 8 | 
 9 | jobs:
10 |   trufflehog:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - shell: bash
14 |         run: |
15 |           if [ "$EVENT_NAME" == "push" ]; then
16 |             echo "depth=$(($(jq length <<< $COMMITS)+2))" >> $GITHUB_ENV
17 |             echo "branch=$REF" >> $GITHUB_ENV
18 |           fi
19 |           if [ "$EVENT_NAME" == "pull_request" ]; then
20 |             echo "depth=$(($PR_COMMITS+2))" >> $GITHUB_ENV
21 |             echo "branch=$PR_REF" >> $GITHUB_ENV
22 |           fi
23 |         env:
24 |           REF: ${{ github.ref_name }}
25 |           COMMITS: ${{ tojson(github.event.commits) }}
26 |           EVENT_NAME: ${{ github.event_name }}
27 |           PR_REF: ${{ github.event.pull_request.head.ref }}
28 |           PR_COMMITS: ${{ github.event.pull_request.commits }}
29 | 
30 |       - name: Checkout code
31 |         uses: actions/checkout@v4
32 |         with:
33 |           ref: ${{env.branch}}
34 |           fetch-depth: ${{env.depth}}
35 | 
36 |       - name: Scan for secrets
37 |         uses: trufflesecurity/trufflehog@main
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test_energy_star.yaml:
--------------------------------------------------------------------------------
 1 | name: Energy Star Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_energy_star:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'energy_star')
31 |       }}
32 | 
33 |     runs-on: ubuntu-latest
34 | 
35 |     steps:
36 |       - name: Checkout
37 |         uses: actions/checkout@v4
38 | 
39 |       - name: Install ffmpeg
40 |         run: sudo apt-get install -y ffmpeg
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run energy star
48 |         run: |
49 |           make test-energy-star
50 | 


--------------------------------------------------------------------------------
/.github/workflows/test_api_rocm.yaml:
--------------------------------------------------------------------------------
 1 | name: API ROCm Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | jobs:
23 |   run_api_rocm_tests:
24 |     if: ${{
25 |       (github.event_name == 'push') ||
26 |       (github.event_name == 'workflow_dispatch') ||
27 |       contains( github.event.pull_request.labels.*.name, 'api') ||
28 |       contains( github.event.pull_request.labels.*.name, 'rocm') ||
29 |       contains( github.event.pull_request.labels.*.name, 'api_rocm')
30 |       }}
31 | 
32 |     uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing
33 |     with:
34 |       test_file: test_api.py
35 |       machine_type: single-gpu
36 |       pytest_keywords: api and cuda
37 |       install_extras: testing,timm,diffusers,codecarbon
38 |     secrets:
39 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/scripts/total_tests_runs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import yaml
 4 | 
 5 | config_dir = "tests/configs"
 6 | config_files = [f for f in os.listdir(config_dir) if not f.startswith("_")]
 7 | 
 8 | run_counts = {}
 9 | for config_file in config_files:
10 |     with open(os.path.join(config_dir, config_file), "r") as f:
11 |         config = yaml.safe_load(f)
12 | 
13 |     for default in config.get("defaults", []):
14 |         if isinstance(default, str) and default != "_self_":
15 |             with open(os.path.join(config_dir, f"{default}.yaml"), "r") as f:
16 |                 default_config = yaml.safe_load(f)
17 |                 params = default_config.get("hydra", {}).get("sweeper", {}).get("params", {})
18 | 
19 |                 if len(params) == 0:
20 |                     run_counts[config_file] = run_counts.get(config_file, 1)
21 |                 else:
22 |                     for param_values in params.values():
23 |                         run_counts[config_file] = run_counts.get(config_file, 1) * len(param_values.split(","))
24 | 
25 | 
26 | for config_file, run_count in run_counts.items():
27 |     print(f"{config_file}: {run_count} runs")
28 | 
29 | print(f"Total runs: {sum(run_counts.values())}")
30 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/openvino/utils.py:
--------------------------------------------------------------------------------
 1 | TASKS_TO_OPENVINO_MODELS = {
 2 |     "fill-mask": "optimum.intel.openvino.OVModelForMaskedLM",
 3 |     "text-generation": "optimum.intel.openvino.OVModelForCausalLM",
 4 |     "text2text-generation": "optimum.intel.openvino.OVModelForSeq2SeqLM",
 5 |     "feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction",
 6 |     "text-classification": "optimum.intel.openvino.OVModelForSequenceClassification",
 7 |     "token-classification": "optimum.intel.openvino.OVModelForTokenClassification",
 8 |     "question-answering": "optimum.intel.openvino.OVModelForQuestionAnswering",
 9 |     "image-classification": "optimum.intel.openvino.OVModelForImageClassification",
10 |     "image-text-to-text": "optimum.intel.openvino.OVModelForVisualCausalLM",
11 |     "audio-classification": "optimum.intel.openvino.OVModelForAudioClassification",
12 |     "pix2struct": "optimum.intel.openvino.OVModelForPix2Struct",
13 | }
14 | TASKS_TO_OPENVINO_PIPELINES = {
15 |     "inpainting": "optimum.intel.openvino.OVPipelineForInpainting",
16 |     "text-to-image": "optimum.intel.openvino.OVPipelineForText2Image",
17 |     "image-to-image": "optimum.intel.openvino.OVPipelineForImage2Image",
18 | }
19 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/ipex/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Optional
 3 | 
 4 | from ...import_utils import ipex_version
 5 | from ..config import BackendConfig
 6 | 
 7 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"]
 8 | 
 9 | 
10 | @dataclass
11 | class IPEXConfig(BackendConfig):
12 |     name: str = "ipex"
13 |     version: Optional[str] = ipex_version()
14 |     _target_: str = "optimum_benchmark.backends.ipex.backend.IPEXBackend"
15 | 
16 |     no_weights: bool = False
17 | 
18 |     # ipexmodel kwargs
19 |     torch_dtype: Optional[str] = None
20 | 
21 |     def __post_init__(self):
22 |         super().__post_init__()
23 | 
24 |         self.device = self.device.lower()
25 | 
26 |         if self.device not in ["cpu", "xpu"]:
27 |             raise ValueError(f"IPEXBackend only supports CPU and XPU devices. Got {self.device} instead.")
28 | 
29 |         if self.model_kwargs.get("torch_dtype", None) is not None:
30 |             raise ValueError(
31 |                 "`torch_dtype` is an explicit argument in the PyTorch backend config. "
32 |                 "Please remove it from the `model_kwargs` and set it in the backend config directly."
33 |             )
34 | 
35 |         if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES:
36 |             raise ValueError(f"`torch_dtype` must be one of {TORCH_DTYPES}. Got {self.torch_dtype} instead.")
37 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_ipex.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU IPEX Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_ipex_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'ipex') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_ipex')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-cli-cpu-ipex
50 | 
51 |       - name: Run examples
52 |         run: |
53 |           make test-cli-cpu-ipex-examples
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_pytorch.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU PyTorch tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_pytorch_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'pytorch') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_pytorch')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-cli-cpu-pytorch
50 | 
51 |       - name: Run examples
52 |         run: |
53 |           make test-cli-cpu-pytorch-examples
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_openvino.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU OpenVINO Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_openvino_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'openvino') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_openvino')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-cli-cpu-openvino
50 | 
51 |       - name: Run examples
52 |         run: |
53 |           make test-cli-cpu-openvino-examples
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_llama_cpp.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU LlamaCpp Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_llama_cpp_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'llama_cpp') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_llama_cpp')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-cli-cpu-llama-cpp
50 | 
51 |       - name: Run examples
52 |         run: |
53 |           make test-cli-cpu-llama-cpp-examples
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_onnxruntime.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU ONNXRuntime Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_onnxruntime_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'onnxruntime') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_onnxruntime')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-cli-cpu-onnxruntime
50 | 
51 |       - name: Run examples
52 |         run: |
53 |           make test-cli-cpu-onnxruntime-examples
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cuda_py_txi.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CUDA Py-TXI Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: auto
24 | 
25 | jobs:
26 |   run_cli_cuda_py_txi_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
32 |       contains( github.event.pull_request.labels.*.name, 'py_txi') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_py_txi')
34 |       }}
35 | 
36 |     runs-on:
37 |       group: aws-g5-4xlarge-plus
38 | 
39 |     steps:
40 |       - name: Checkout
41 |         uses: actions/checkout@v4
42 | 
43 |       - name: Install uv
44 |         uses: astral-sh/setup-uv@v6
45 |         with:
46 |           enable-cache: true
47 | 
48 |       - name: Run tests
49 |         run: |
50 |           make test-cli-cuda-py-txi
51 | 
52 |       - name: Run examples
53 |         run: |
54 |           make test-cli-cuda-py-txi-examples
55 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cuda_onnxruntime.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CUDA ONNXRuntime Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: auto
24 | 
25 | jobs:
26 |   run_cli_cuda_onnxruntime_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
32 |       contains( github.event.pull_request.labels.*.name, 'onnxruntime') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_onnxruntime')
34 |       }}
35 | 
36 |     runs-on:
37 |       group: aws-g5-4xlarge-plus
38 | 
39 |     steps:
40 |       - name: Checkout
41 |         uses: actions/checkout@v4
42 | 
43 |       - name: Install uv
44 |         uses: astral-sh/setup-uv@v6
45 |         with:
46 |           enable-cache: true
47 | 
48 |       - name: Run tests
49 |         run: |
50 |           make test-cli-cuda-onnxruntime
51 | 
52 |       - name: Run examples
53 |         run: |
54 |           make test-cli-cuda-onnxruntime-examples
55 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/diffusers_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | from ..import_utils import is_diffusers_available
 4 | from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym_task
 5 | 
 6 | if is_diffusers_available():
 7 |     import diffusers
 8 |     from diffusers import DiffusionPipeline
 9 | 
10 | 
11 | def get_diffusers_auto_pipeline_class_for_task(task: str):
12 |     task = map_from_synonym_task(task)
13 | 
14 |     if not is_diffusers_available():
15 |         raise ImportError("diffusers is not available. Please, pip install diffusers.")
16 | 
17 |     if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES:
18 |         raise ValueError(f"Task {task} not supported for diffusers")
19 | 
20 |     model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES[task]
21 | 
22 |     return getattr(diffusers, model_loader_name)
23 | 
24 | 
25 | def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
26 |     if not is_diffusers_available():
27 |         raise ImportError("diffusers is not available. Please, pip install diffusers.")
28 | 
29 |     config = DiffusionPipeline.load_config(model, **kwargs)
30 |     pipeline_config = config[0] if isinstance(config, tuple) else config
31 |     return pipeline_config
32 | 
33 | 
34 | def extract_diffusers_shapes_from_model(**kwargs) -> Dict[str, int]:
35 |     if not is_diffusers_available():
36 |         raise ImportError("diffusers is not available. Please, pip install diffusers.")
37 | 
38 |     shapes = {}
39 | 
40 |     return shapes
41 | 


--------------------------------------------------------------------------------
/.github/workflows/test_api_cpu.yaml:
--------------------------------------------------------------------------------
 1 | name: API CPU Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_api_cpu_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'api') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'api_cpu')
33 |       }}
34 | 
35 |     runs-on: ubuntu-latest
36 | 
37 |     steps:
38 |       - name: Checkout
39 |         uses: actions/checkout@v4
40 | 
41 |       - name: Install uv
42 |         uses: astral-sh/setup-uv@v6
43 |         with:
44 |           enable-cache: true
45 | 
46 |       - name: Run tests
47 |         run: |
48 |           make test-api-cpu
49 |         env:
50 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
51 |           PUSH_REPO_ID: optimum-benchmark/cpu
52 | 
53 |       - name: Run examples
54 |         run: |
55 |           make test-api-cpu-examples
56 |         env:
57 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
58 |           PUSH_REPO_ID: optimum-benchmark/cpu
59 | 


--------------------------------------------------------------------------------
/optimum_benchmark/generators/input_generator.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional
 2 | 
 3 | from .base import BaseGenerator
 4 | from .model_generator import MODEL_TYPE_TO_GENERATORS
 5 | from .task_generator import TASKS_TO_GENERATORS
 6 | 
 7 | 
 8 | class InputGenerator:
 9 |     generator: BaseGenerator
10 | 
11 |     def __init__(
12 |         self,
13 |         task: str,
14 |         input_shapes: Dict[str, int],
15 |         model_shapes: Dict[str, int],
16 |         model_type: Optional[str] = None,
17 |     ) -> None:
18 |         # input_shapes take precedence over model_shapes
19 |         all_shapes = {**model_shapes, **input_shapes}
20 | 
21 |         if model_type in MODEL_TYPE_TO_GENERATORS:
22 |             self.generator = MODEL_TYPE_TO_GENERATORS[model_type](shapes=all_shapes, with_labels=False)
23 |         elif task in TASKS_TO_GENERATORS:
24 |             self.generator = TASKS_TO_GENERATORS[task](shapes=all_shapes, with_labels=False)
25 |         else:
26 |             raise NotImplementedError(
27 |                 f"Task {task} is not supported for input generation. "
28 |                 f"Available tasks: {list(TASKS_TO_GENERATORS.keys())}. "
29 |                 f"Available model types: {list(MODEL_TYPE_TO_GENERATORS.keys())}. "
30 |                 "If you want to add support for this task or model type, "
31 |                 "please submit a PR or a feature request to optimum-benchmark."
32 |             )
33 | 
34 |     def __call__(self) -> Dict[str, Any]:
35 |         task_input = self.generator()
36 |         return task_input
37 | 


--------------------------------------------------------------------------------
/.github/workflows/test_api_cuda.yaml:
--------------------------------------------------------------------------------
 1 | name: API CUDA Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_api_cuda_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'api') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
32 |       contains( github.event.pull_request.labels.*.name, 'api_cuda')
33 |       }}
34 | 
35 |     runs-on:
36 |       group: aws-g5-4xlarge-plus
37 | 
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Install uv
43 |         uses: astral-sh/setup-uv@v6
44 |         with:
45 |           enable-cache: true
46 | 
47 |       - name: Run tests
48 |         run: |
49 |           make test-api-cuda
50 |         env:
51 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
52 |           PUSH_REPO_ID: optimum-benchmark/cuda
53 | 
54 |       - name: Run examples
55 |         run: |
56 |           make test-api-cuda-examples
57 |         env:
58 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
59 |           PUSH_REPO_ID: optimum-benchmark/cuda
60 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_bert.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 4 | from optimum_benchmark.logging_utils import setup_logging
 5 | 
 6 | BENCHMARK_NAME = "cuda_pytorch_bert"
 7 | MODEL = "google-bert/bert-base-uncased"
 8 | PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None)
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     level = os.environ.get("LOG_LEVEL", "INFO")
13 |     to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
14 |     setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
15 | 
16 |     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
17 |     backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
18 |     scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
19 |     benchmark_config = BenchmarkConfig(
20 |         name=BENCHMARK_NAME,
21 |         launcher=launcher_config,
22 |         scenario=scenario_config,
23 |         backend=backend_config,
24 |         print_report=True,
25 |         log_report=True,
26 |     )
27 |     benchmark_report = Benchmark.launch(benchmark_config)
28 |     benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
29 | 
30 |     if PUSH_REPO_ID is not None:
31 |         benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
32 |         benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
33 |         benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
34 | 


--------------------------------------------------------------------------------
/.github/workflows/test_api_misc.yaml:
--------------------------------------------------------------------------------
 1 | name: API Misc Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_api_misc_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'api') ||
31 |       contains( github.event.pull_request.labels.*.name, 'misc') ||
32 |       contains( github.event.pull_request.labels.*.name, 'api_misc')
33 |       }}
34 | 
35 |     strategy:
36 |       fail-fast: false
37 |       matrix:
38 |         os: [ubuntu-latest, "macos-latest", windows-latest]
39 |         python: ["3.10", "3.12"]
40 | 
41 |     name: API Misc Tests - OS ${{ matrix.os }} - Python ${{ matrix.python }}
42 | 
43 |     runs-on: ${{ matrix.os }}
44 | 
45 |     steps:
46 |       - name: Checkout
47 |         uses: actions/checkout@v4
48 | 
49 |       - name: Install uv
50 |         uses: astral-sh/setup-uv@v6
51 |         with:
52 |           enable-cache: true
53 |           python-version: ${{ matrix.python }}
54 | 
55 |       - name: Run tests
56 |         run: |
57 |           make test-api-misc
58 |         env:
59 |           HF_TOKEN: ${{ secrets.HF_TOKEN }}
60 |           PUSH_REPO_ID: optimum-benchmark/misc-${{ matrix.os }}-${{ matrix.python }}
61 | 


--------------------------------------------------------------------------------
/scripts/update_ci_badges.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from pathlib import Path
 4 | 
 5 | # list all workflow files
 6 | workflow_dir = ".github/workflows"
 7 | workflow_files = os.listdir(workflow_dir)
 8 | 
 9 | # generate the markdown for the badges
10 | base_url = "https://github.com/huggingface/optimum-benchmark/actions/workflows"
11 | api_badges = []
12 | cli_badges = []
13 | for file in workflow_files:
14 |     # extract the name from the file name
15 |     name = re.sub(r"(test_|\.yaml)", "", file).upper()
16 |     badge_url = f"{base_url}/{file}/badge.svg"
17 |     workflow_url = f"{base_url}/{file}"
18 |     badge = f"[![{name}]({badge_url})]({workflow_url})"
19 |     if "api" in file:
20 |         api_badges.append(badge)
21 |     elif "cli" in file:
22 |         cli_badges.append(badge)
23 | 
24 | # order the badges
25 | api_badges = sorted(api_badges)
26 | cli_badges = sorted(cli_badges)
27 | 
28 | # read the README file
29 | readme_path = Path("README.md")
30 | readme_text = readme_path.read_text()
31 | 
32 | # find the position to insert the badges
33 | api_start_pos = readme_text.index("### API 📈") + len("### API 📈\n\n")
34 | api_end_pos = readme_text.index("#", api_start_pos)
35 | cli_start_pos = readme_text.index("### CLI 📈") + len("### CLI 📈\n\n")
36 | cli_end_pos = readme_text.index("#", cli_start_pos)
37 | 
38 | # insert the badges into the README text
39 | new_readme_text = (
40 |     readme_text[:api_start_pos]
41 |     + "\n".join(api_badges)
42 |     + "\n\n"
43 |     + readme_text[api_end_pos:cli_start_pos]
44 |     + "\n".join(cli_badges)
45 |     + "\n\n"
46 |     + readme_text[cli_end_pos:]
47 | )
48 | 
49 | # write the new README text to the file
50 | readme_path.write_text(new_readme_text)
51 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_misc.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI Misc Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_misc_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'misc') ||
32 |       contains( github.event.pull_request.labels.*.name, 'cli_misc')
33 |       }}
34 | 
35 |     strategy:
36 |       fail-fast: false
37 |       matrix:
38 |         os: [ubuntu-latest, "macos-latest", windows-latest]
39 |         python: ["3.10", "3.12"]
40 | 
41 |     name: CLI Misc Tests - OS ${{ matrix.os }} - Python ${{ matrix.python }}
42 | 
43 |     runs-on: ${{ matrix.os }}
44 | 
45 |     steps:
46 |       - name: Checkout
47 |         uses: actions/checkout@v4
48 | 
49 |       - name: Install uv
50 |         uses: astral-sh/setup-uv@v6
51 |         with:
52 |           enable-cache: true
53 |           python-version: ${{ matrix.python }}
54 | 
55 |       - name: Install Linux packages
56 |         if: matrix.os == 'ubuntu-latest'
57 |         run: |
58 |           sudo apt-get update
59 |           sudo apt-get install -y numactl
60 | 
61 |       - name: Run tests
62 |         run: |
63 |           make test-cli-misc
64 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cpu_py_txi.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CPU Py-TXI Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: cpu
24 | 
25 | jobs:
26 |   run_cli_cpu_py_txi_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cpu') ||
32 |       contains( github.event.pull_request.labels.*.name, 'py_txi') ||
33 |       contains( github.event.pull_request.labels.*.name, 'cli_cpu_py_txi')
34 |       }}
35 | 
36 |     runs-on: ubuntu-latest
37 | 
38 |     steps:
39 |       - name: Free Disk Space (Ubuntu)
40 |         uses: jlumbroso/free-disk-space@main
41 | 
42 |       - name: Checkout
43 |         uses: actions/checkout@v4
44 | 
45 |       - name: Install uv
46 |         uses: astral-sh/setup-uv@v6
47 |         with:
48 |           enable-cache: true
49 | 
50 |       - name: Pull images
51 |         run: |
52 |           docker pull ghcr.io/huggingface/text-generation-inference:3.3-intel-cpu
53 |           docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.8
54 | 
55 |       - name: Run tests
56 |         run: |
57 |           make test-cli-cpu-py-txi
58 | 
59 |       - name: Run examples
60 |         run: |
61 |           make test-cli-cpu-py-txi-examples
62 | 


--------------------------------------------------------------------------------
/optimum_benchmark/profilers/fx_profiler.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from logging import getLogger
 3 | from typing import Any, List, Tuple
 4 | 
 5 | import torch
 6 | from torch.fx import Interpreter
 7 | from torch.fx.graph_module import GraphModule
 8 | from torch.fx.node import Node
 9 | 
10 | LOGGER = getLogger("fx_profiler")
11 | 
12 | 
13 | class FXProfilingWrapper(Interpreter):
14 |     def __init__(self, module: GraphModule):
15 |         super().__init__(module)
16 |         self.profiling_records: List[Tuple[str, str, float]] = []
17 | 
18 |     def run(self, *args) -> Any:
19 |         return super().run(*args)
20 | 
21 |     def run_node(self, node: Node) -> Any:
22 |         if self.module.device.type == "cuda":
23 |             start = torch.cuda.Event(enable_timing=True)
24 |             end = torch.cuda.Event(enable_timing=True)
25 |             start.record(stream=torch.cuda.current_stream())
26 |             return_val = super().run_node(node)
27 |             end.record(stream=torch.cuda.current_stream())
28 |             torch.cuda.synchronize()
29 |             node_runtime = start.elapsed_time(end) / 1e3
30 |         else:
31 |             start = time.perf_counter_ns()
32 |             return_val = super().run_node(node)
33 |             end = time.perf_counter_ns()
34 |             node_runtime = (end - start) / 1e9
35 | 
36 |         LOGGER.debug(f"Node {node.name} took {node_runtime:.2e} seconds")
37 |         self.profiling_records.append((node.name, node.op, node_runtime))
38 | 
39 |         return return_val
40 | 
41 |     def __call__(self, **kwargs) -> Any:
42 |         args = kwargs.values()
43 |         return super().run(*args)
44 | 
45 |     def get_profiling_records(self) -> List[Tuple[str, str, float]]:
46 |         return self.profiling_records
47 | 


--------------------------------------------------------------------------------
/optimum_benchmark/generators/dataset_generator.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from datasets import Dataset
 4 | 
 5 | from .base import BaseGenerator
 6 | from .model_generator import MODEL_TYPE_TO_GENERATORS
 7 | from .task_generator import TASKS_TO_GENERATORS
 8 | 
 9 | 
10 | class DatasetGenerator:
11 |     generator: BaseGenerator
12 | 
13 |     def __init__(
14 |         self,
15 |         task: str,
16 |         dataset_shapes: Dict[str, int],
17 |         model_shapes: Dict[str, int],
18 |         model_type: Optional[str] = None,
19 |     ) -> None:
20 |         # dataset_shapes take precedence over model_shapes
21 |         all_shapes = {**model_shapes, **dataset_shapes}
22 |         all_shapes["batch_size"] = all_shapes.pop("dataset_size", None)
23 | 
24 |         if model_type in MODEL_TYPE_TO_GENERATORS:
25 |             self.generator = MODEL_TYPE_TO_GENERATORS[model_type](shapes=all_shapes, with_labels=True)
26 |         elif task in TASKS_TO_GENERATORS:
27 |             self.generator = TASKS_TO_GENERATORS[task](shapes=all_shapes, with_labels=True)
28 |         else:
29 |             raise NotImplementedError(
30 |                 f"Task {task} is not supported for dataset generation. "
31 |                 f"Available tasks: {list(TASKS_TO_GENERATORS.keys())}. "
32 |                 f"Available model types: {list(MODEL_TYPE_TO_GENERATORS.keys())}. "
33 |                 "If you want to add support for this task or model type, "
34 |                 "please submit a PR or a feature request to optimum-benchmark."
35 |             )
36 | 
37 |     def __call__(self) -> Dataset:
38 |         task_dataset = self.generator()
39 |         task_dataset = Dataset.from_dict(task_dataset)
40 |         task_dataset.set_format(type="torch", columns=list(task_dataset.features.keys()))
41 |         return task_dataset
42 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/config.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC
 2 | from dataclasses import dataclass, field
 3 | from logging import getLogger
 4 | from typing import Any, Dict, Optional, TypeVar
 5 | 
 6 | from ..system_utils import is_nvidia_system, is_rocm_system
 7 | 
 8 | LOGGER = getLogger("launcher")
 9 | 
10 | 
11 | @dataclass
12 | class LauncherConfig(ABC):
13 |     name: str
14 |     _target_: str
15 | 
16 |     device_isolation: bool = False
17 |     device_isolation_action: Optional[str] = None
18 | 
19 |     numactl: bool = False
20 |     numactl_kwargs: Dict[str, Any] = field(default_factory=dict)
21 | 
22 |     def __post_init__(self):
23 |         if self.device_isolation and not is_nvidia_system() and not is_rocm_system():
24 |             raise ValueError(
25 |                 "Device isolation is only supported on NVIDIA and ROCm systems. "
26 |                 "Please set `device_isolation` to False or make sure your drivers "
27 |                 "are correctly installed by running `nvidia-smi` or `rocm-smi`."
28 |             )
29 | 
30 |         if self.device_isolation and self.device_isolation_action is None:
31 |             LOGGER.warning(
32 |                 "Device isolation is enabled but no action is specified. "
33 |                 "Please set `device_isolation_action` to either `error`, `warn`, or `kill`. "
34 |                 "Defaulting to `warn`."
35 |             )
36 |             self.device_isolation_action = "warn"
37 | 
38 |         elif self.device_isolation and self.device_isolation_action not in {"error", "warn", "kill"}:
39 |             raise ValueError(
40 |                 f"Unsupported device isolation action {self.device_isolation_action}. "
41 |                 "Please set `device_isolation_action` to either `error`, `warn`, or `kill`."
42 |             )
43 | 
44 | 
45 | LauncherConfigT = TypeVar("LauncherConfigT", bound=LauncherConfig)
46 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/onnxruntime/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantizationMode, QuantType
 4 | from optimum.pipelines import ORT_SUPPORTED_TASKS
 5 | 
 6 | TASKS_TO_ORTMODELS = {
 7 |     task: f"optimum.onnxruntime.{task_dict['class'][0].__name__}" for task, task_dict in ORT_SUPPORTED_TASKS.items()
 8 | }
 9 | 
10 | TASKS_TO_ONNXRUNTIME_PIPELINES = {
11 |     "inpainting": "optimum.onnxruntime.ORTPipelineForInpainting",
12 |     "text-to-image": "optimum.onnxruntime.ORTPipelineForText2Image",
13 |     "image-to-image": "optimum.onnxruntime.ORTPipelineForImage2Image",
14 | }
15 | 
16 | 
17 | def format_calibration_config(calibration_config: Dict[str, Any]) -> None:
18 |     if calibration_config.get("method", None) is not None:
19 |         calibration_config["method"] = CalibrationMethod[calibration_config["method"]]
20 | 
21 |     return calibration_config
22 | 
23 | 
24 | def format_quantization_config(quantization_config: Dict[str, Any]) -> None:
25 |     """Format the quantization dictionary for onnxruntime."""
26 |     # the conditionals are here because some quantization strategies don't have all the options
27 |     if quantization_config.get("format", None) is not None:
28 |         quantization_config["format"] = QuantFormat.from_string(quantization_config["format"])
29 |     if quantization_config.get("mode", None) is not None:
30 |         quantization_config["mode"] = QuantizationMode.from_string(quantization_config["mode"])
31 |     if quantization_config.get("activations_dtype", None) is not None:
32 |         quantization_config["activations_dtype"] = QuantType.from_string(quantization_config["activations_dtype"])
33 |     if quantization_config.get("weights_dtype", None) is not None:
34 |         quantization_config["weights_dtype"] = QuantType.from_string(quantization_config["weights_dtype"])
35 | 
36 |     return quantization_config
37 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/tensorrt_llm/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import Optional
 3 | 
 4 | from ...import_utils import tesnorrt_llm_version
 5 | from ..config import BackendConfig
 6 | 
 7 | SUPPORTED_DTYPES = [None, "float16", "bfloat16", "float32"]
 8 | 
 9 | 
10 | @dataclass
11 | class TRTLLMConfig(BackendConfig):
12 |     name: str = "tensorrt-llm"
13 |     version: Optional[str] = tesnorrt_llm_version()
14 |     _target_: str = "optimum_benchmark.backends.tensorrt_llm.backend.TRTLLMBackend"
15 | 
16 |     no_weights: bool = False
17 | 
18 |     # trtllm kwargs
19 |     tp: Optional[int] = None
20 |     pp: Optional[int] = None
21 |     dtype: Optional[str] = None
22 |     use_fp8: Optional[bool] = None
23 |     world_size: Optional[int] = None
24 |     gpus_per_node: Optional[int] = None
25 |     max_input_len: Optional[int] = None
26 |     max_output_len: Optional[int] = None
27 |     max_batch_size: Optional[int] = None
28 |     max_new_tokens: Optional[int] = None
29 |     max_prompt_length: Optional[int] = None
30 |     optimization_level: Optional[int] = None
31 |     use_cuda_graph: Optional[bool] = None
32 | 
33 |     def __post_init__(self) -> None:
34 |         super().__post_init__()
35 | 
36 |         if self.device != "cuda":
37 |             raise NotImplementedError(f"TRTLLMBackend only supports device cuda, got {self.device}")
38 | 
39 |         if self.dtype not in SUPPORTED_DTYPES:
40 |             raise ValueError(f"dtype must be one of float16, bfloat16, float32, got {self.dtype}")
41 | 
42 |         if self.gpus_per_node is not None and self.world_size is not None and self.gpus_per_node != self.world_size:
43 |             raise ValueError(f"gpus_per_node ({self.gpus_per_node}) != world_size ({self.world_size})")
44 | 
45 |         if (
46 |             self.world_size is not None
47 |             and self.pp is not None
48 |             and self.tp is not None
49 |             and self.world_size != self.pp * self.tp
50 |         ):
51 |             raise ValueError(f"world_size ({self.gpus_per_node}) != pp ({self.pp}) * tp ({self.tp})")
52 | 


--------------------------------------------------------------------------------
/optimum_benchmark/generators/base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import random
 3 | import string
 4 | from abc import ABC
 5 | from typing import Dict, List, Tuple
 6 | 
 7 | import torch
 8 | 
 9 | LOGGER = logging.getLogger("generators")
10 | 
11 | 
12 | class BaseGenerator(ABC):
13 |     def __init__(self, shapes: Dict[str, int], with_labels: bool):
14 |         self.shapes = shapes
15 |         self.with_labels = with_labels
16 | 
17 |     def assert_not_missing_shapes(self, required_shapes: List[str]):
18 |         for shape in required_shapes:
19 |             assert self.shapes.get(shape, None) is not None, (
20 |                 f"{shape} either couldn't be inferred automatically from model artifacts or should be provided by the user. "
21 |                 f"Please provide it under `scenario.input_shapes.{shape}` or open an issue/PR in optimum-benchmark repository. "
22 |             )
23 | 
24 |     @staticmethod
25 |     def generate_constant_integers(value: int, shape: Tuple[int]):
26 |         return torch.full(shape, value, dtype=torch.int64)
27 | 
28 |     @staticmethod
29 |     def generate_constant_floats(value: float, shape: Tuple[int]):
30 |         return torch.full(shape, value, dtype=torch.float32)
31 | 
32 |     @staticmethod
33 |     def generate_random_integers(min_value: int, max_value: int, shape: Tuple[int]):
34 |         return torch.randint(min_value, max_value, shape)
35 | 
36 |     @staticmethod
37 |     def generate_random_floats(min_value: float, max_value: float, shape: Tuple[int]):
38 |         return torch.rand(shape) * (max_value - min_value) + min_value
39 | 
40 |     @staticmethod
41 |     def generate_ranges(start: int, stop: int, shape: Tuple[int]):
42 |         return torch.arange(start, stop).repeat(shape[0], 1)
43 | 
44 |     @staticmethod
45 |     def generate_random_strings(num_seq: int) -> List[str]:
46 |         return [
47 |             "".join(random.choice(string.ascii_letters + string.digits) for _ in range(random.randint(10, 100)))
48 |             for _ in range(num_seq)
49 |         ]
50 | 
51 |     def __call__(self):
52 |         raise NotImplementedError("Generator must implement __call__ method")
53 | 


--------------------------------------------------------------------------------
/docker/cpu/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ARG UBUNTU_VERSION=22.04
16 | 
17 | FROM ubuntu:${UBUNTU_VERSION}
18 | 
19 | # Install necessary packages
20 | ENV DEBIAN_FRONTEND=noninteractive
21 | ENV PATH="/home/user/.local/bin:${PATH}"
22 | RUN apt-get update && apt-get install -y --no-install-recommends \
23 |     sudo build-essential git bash-completion numactl \
24 |     python3.10 python3-pip python3.10-dev google-perftools && \
25 |     apt-get clean && rm -rf /var/lib/apt/lists/* && \
26 |     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
27 |     pip install --no-cache-dir --upgrade pip setuptools wheel intel-openmp
28 | 
29 | ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so"
30 | 
31 | # Install PyTorch
32 | ARG TORCH_VERSION=""
33 | ARG TORCH_RELEASE_TYPE=stable
34 | 
35 | RUN if [ -n "${TORCH_VERSION}" ]; then \
36 |     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/cpu ; \
37 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \
38 |     pip install --no-cache-dir torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/cpu ; \
39 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \
40 |     pip install --no-cache-dir --pre torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/nightly/cpu ; \
41 | else \
42 |     echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \
43 | fi
44 | 


--------------------------------------------------------------------------------
/optimum_benchmark/profilers/ort_profiler.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from logging import getLogger
 3 | from typing import List, Tuple
 4 | 
 5 | import pandas as pd
 6 | from optimum.onnxruntime import ORTModel
 7 | 
 8 | LOGGER = getLogger("ort_profiler")
 9 | 
10 | 
11 | class ORTProfilingWrapper:
12 |     def __init__(self, module: ORTModel):
13 |         self.module = module
14 |         self.profiling_records: List[Tuple[str, str, float]] = []
15 | 
16 |     def __call__(self, *args, **kwargs):
17 |         return self.module(*args, **kwargs)
18 | 
19 |     def get_profiling_records(self) -> List[Tuple[str, str, float]]:
20 |         profiling_json = self.module.model.end_profiling()  # type: ignore
21 |         with open(profiling_json) as file_obj:
22 |             profiling_data = json.load(file_obj)
23 |             if isinstance(profiling_data, dict):
24 |                 profiling_data = profiling_data["traceEvents"]
25 | 
26 |         profiling_records = extract_last_run_records(profiling_data)
27 |         return normalize_records(profiling_records)
28 | 
29 | 
30 | def normalize_records(data) -> List[Tuple[str, str, float]]:
31 |     records = []
32 |     for item in data:
33 |         cat = item.get("cat")
34 |         if cat is None:
35 |             continue
36 |         dur = item.get("dur")
37 |         if dur is None:
38 |             continue
39 |         arg = item.get("args")
40 |         if arg is None:
41 |             continue
42 |         op_name = arg.get("op_name")
43 | 
44 |         name = item["name"]
45 | 
46 |         if cat != "Kernel" and not name.endswith("kernel_time"):
47 |             continue
48 | 
49 |         if cat in ["Kernel", "Node"]:
50 |             LOGGER.debug(f"Kernel/Node {name} took {dur / 1e6:.2e} seconds")
51 |             records.append((name.replace("_kernel_time", ""), op_name, dur / 1e6))
52 | 
53 |     return records
54 | 
55 | 
56 | def extract_last_run_records(data):
57 |     # Here we assume that the traces are properly ordered, so we can simplify the splitting logic.
58 |     return (
59 |         pd.DataFrame(data)[["name", "cat", "dur", "args"]]
60 |         .groupby("name")
61 |         .last()  # not sure if this is the right way to do it
62 |         .reset_index()
63 |         .to_dict(orient="records")
64 |     )
65 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/vllm/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | from ...import_utils import vllm_version
 5 | from ..config import BackendConfig
 6 | 
 7 | 
 8 | @dataclass
 9 | class VLLMConfig(BackendConfig):
10 |     name: str = "vllm"
11 |     version: Optional[str] = vllm_version()
12 |     _target_: str = "optimum_benchmark.backends.vllm.backend.VLLMBackend"
13 | 
14 |     # creates a model from scratch with dummy weights
15 |     no_weights: bool = False
16 | 
17 |     # decides whether to use the offline or online llm engine
18 |     serving_mode: str = "offline"
19 | 
20 |     # passed to EngineArgs
21 |     engine_args: Dict[str, Any] = field(default_factory=dict)
22 | 
23 |     def __post_init__(self):
24 |         # duplicates that are handled by the backend config directly
25 |         if "model" in self.engine_args:
26 |             raise ValueError("model should not be passed in `backend.engine_args`, use `backend.model` instead")
27 | 
28 |         if "tokenizer" in self.engine_args:
29 |             raise ValueError("tokenizer should not be passed in `backend.engine_args`, use `backend.processor` instead")
30 | 
31 |         if "device" in self.engine_args:
32 |             raise ValueError("device should not be passed in `backend.engine_args`, use `backend.device` instead")
33 | 
34 |         if self.serving_mode not in ["offline", "online"]:
35 |             raise ValueError(f"Invalid serving_mode: {self.serving_mode}. Must be 'online' or 'offline'.")
36 | 
37 |         # needed for task/library/model_type inference
38 |         self.model_kwargs = {
39 |             "revision": self.engine_args.get("revision", "main"),
40 |             "trust_remote_code": self.engine_args.get("trust_remote_code", False),
41 |             **self.model_kwargs,
42 |         }
43 |         self.processor_kwargs = {
44 |             "revision": self.engine_args.get("tokenizer_revision", "main"),
45 |             "trust_remote_code": self.engine_args.get("trust_remote_code", False),
46 |             **self.processor_kwargs,
47 |         }
48 | 
49 |         super().__post_init__()
50 | 
51 |         if self.engine_args.get("disable_log_stats", None) is None:
52 |             self.engine_args["disable_log_stats"] = True
53 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_rocm_pytorch.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI ROCm PyTorch Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | jobs:
23 |   run_cli_rocm_pytorch_single_gpu_tests:
24 |     if: ${{
25 |       (github.event_name == 'push') ||
26 |       (github.event_name == 'workflow_dispatch') ||
27 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
28 |       contains( github.event.pull_request.labels.*.name, 'rocm') ||
29 |       contains( github.event.pull_request.labels.*.name, 'pytorch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_single_gpu')
32 |       }}
33 | 
34 |     uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing
35 |     with:
36 |       test_file: test_cli.py
37 |       machine_type: single-gpu
38 |       install_extras: testing,diffusers,timm,peft,gptqmodel
39 |       pytest_keywords: cli and cuda and pytorch and not (dp or tp or ddp or device_map or deepspeed) and not (bnb or awq or gptq)
40 | 
41 |   run_cli_rocm_pytorch_multi_gpu_tests:
42 |     if: ${{
43 |       (github.event_name == 'push') ||
44 |       (github.event_name == 'workflow_dispatch') ||
45 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
46 |       contains( github.event.pull_request.labels.*.name, 'rocm') ||
47 |       contains( github.event.pull_request.labels.*.name, 'pytorch') ||
48 |       contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
49 |       contains( github.event.pull_request.labels.*.name, 'cli_rocm_pytorch_multi_gpu')
50 |       }}
51 | 
52 |     uses: huggingface/hf-workflows/.github/workflows/optimum_benchmark_instinct_ci.yaml@testing
53 |     with:
54 |       test_file: test_cli.py
55 |       machine_type: multi-gpu
56 |       install_extras: testing,diffusers,timm,peft
57 |       pytest_keywords: cli and cuda and pytorch and (dp or tp or ddp or device_map)
58 | 


--------------------------------------------------------------------------------
/docker/cuda/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ARG CUDA_VERSION=12.8.1
16 | ARG UBUNTU_VERSION=22.04
17 | 
18 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION}
19 | 
20 | # Install necessary packages
21 | ENV DEBIAN_FRONTEND=noninteractive
22 | RUN apt-get update && apt-get install -y --no-install-recommends \
23 |     sudo build-essential git bash-completion \
24 |     python3.10 python3-pip python3.10-dev && \
25 |     apt-get clean && rm -rf /var/lib/apt/lists/* && \
26 |     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
27 |     pip install --no-cache-dir --upgrade pip setuptools wheel requests
28 | 
29 | # Install PyTorch
30 | ARG TORCH_VERSION=""
31 | ARG TORCH_CUDA=cu128
32 | ARG TORCH_RELEASE_TYPE=stable
33 | 
34 | RUN if [ -n "${TORCH_VERSION}" ]; then \
35 |     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
36 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \
37 |     pip install --no-cache-dir torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/${TORCH_CUDA} ; \
38 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \
39 |     pip install --no-cache-dir --pre torch torchvision torchaudio torchao --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA} ; \
40 | else \
41 |     echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \
42 | fi
43 | 
44 | # Install quantization libraries from source
45 | ENV MAX_JOBS=1
46 | ENV CUDA_VERSION=12.8
47 | ENV GPTQMODEL_FORCE_BUILD=1
48 | ENV TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0"
49 | 
50 | RUN pip install -v gptqmodel --no-build-isolation --no-cache-dir
51 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/openvino/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | from ...import_utils import openvino_version
 5 | from ..config import BackendConfig
 6 | 
 7 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"]
 8 | 
 9 | 
10 | @dataclass
11 | class OpenVINOConfig(BackendConfig):
12 |     name: str = "openvino"
13 |     version: Optional[str] = openvino_version()
14 |     _target_: str = "optimum_benchmark.backends.openvino.backend.OpenVINOBackend"
15 | 
16 |     no_weights: bool = False
17 | 
18 |     # ovmodel kwargs
19 |     export: Optional[bool] = None
20 |     use_cache: Optional[bool] = None
21 |     use_merged: Optional[bool] = None
22 |     torch_dtype: Optional[str] = None
23 |     load_in_8bit: Optional[bool] = None
24 |     load_in_4bit: Optional[bool] = None
25 |     ov_config: Optional[Dict[str, Any]] = None
26 |     quantization_config: Optional[Dict[str, Any]] = None
27 | 
28 |     # compilation options
29 |     half: bool = False
30 |     compile: bool = False
31 |     reshape: bool = False
32 |     reshape_kwargs: Dict[str, int] = field(default_factory=dict)
33 | 
34 |     def __post_init__(self):
35 |         super().__post_init__()
36 | 
37 |         if self.device_ids is not None:
38 |             raise NotImplementedError(
39 |                 "OpenVINOBackend does not support device_ids. "
40 |                 "Please use the `device` argument with OpenVINO device notation, e.g. 'CPU', 'GPU.0'"
41 |             )
42 | 
43 |         if self.model_kwargs.get("torch_dtype", None) is not None:
44 |             raise ValueError(
45 |                 "`torch_dtype` is an explicit argument in the OpenVINO backend config. "
46 |                 "Please remove it from the `model_kwargs` and set it in the backend config directly."
47 |             )
48 | 
49 |         if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES:
50 |             raise ValueError(f"torch_dtype should be one of None or {TORCH_DTYPES}, got {self.torch_dtype}")
51 | 
52 |         if self.intra_op_num_threads is not None:
53 |             raise NotImplementedError("OpenVINOBackend does not support intra_op_num_threads. Please use the ov_config")
54 | 
55 |         if self.inter_op_num_threads is not None:
56 |             raise NotImplementedError("OpenVINOBackend does not support inter_op_num_threads. Please use the ov_config")
57 | 


--------------------------------------------------------------------------------
/optimum_benchmark/logging_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import logging.config
 3 | from subprocess import PIPE, STDOUT, Popen
 4 | from typing import List, Optional
 5 | 
 6 | 
 7 | def setup_logging(
 8 |     level: str = "INFO",
 9 |     to_file: bool = False,
10 |     use_colorlog: bool = True,
11 |     prefix: Optional[str] = None,
12 |     disable_existing_loggers: bool = False,
13 | ):
14 |     # base logging config
15 |     logging_config = {
16 |         "version": 1,
17 |         "handlers": {
18 |             "console": {"formatter": "simple", "stream": "ext://sys.stdout", "class": "logging.StreamHandler"},
19 |         },
20 |         "root": {"level": level, "handlers": ["console"]},
21 |         "disable_existing_loggers": disable_existing_loggers,
22 |     }
23 | 
24 |     # formatters
25 |     logging_config["formatters"] = {
26 |         "simple": {"format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"},
27 |     }
28 | 
29 |     # add file handler
30 |     if to_file:
31 |         logging_config["handlers"]["file"] = {
32 |             "formatter": "simple",
33 |             "filename": "benchmark.log",
34 |             "class": "logging.FileHandler",
35 |         }
36 |         logging_config["root"]["handlers"].append("file")
37 | 
38 |     # use colorlog
39 |     if use_colorlog:
40 |         logging_config["formatters"]["colorlog"] = {
41 |             "()": "colorlog.ColoredFormatter",
42 |             "format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s",
43 |             "log_colors": {"DEBUG": "purple", "INFO": "green", "WARNING": "yellow", "CRITICAL": "red", "ERROR": "red"},
44 |         }
45 |         for handler in logging_config["handlers"]:
46 |             logging_config["handlers"][handler]["formatter"] = "colorlog"
47 | 
48 |     # format prefix
49 |     if prefix is not None:
50 |         for formatter in logging_config["formatters"]:
51 |             logging_config["formatters"][formatter]["format"] = (
52 |                 f"[{prefix}]" + logging_config["formatters"][formatter]["format"]
53 |             )
54 | 
55 |     logging.config.dictConfig(logging_config)
56 | 
57 | 
58 | def run_subprocess_and_log_stream_output(logger: logging.Logger, args: List[str]) -> Popen:
59 |     popen = Popen(args, stdout=PIPE, stderr=STDOUT)
60 | 
61 |     for line in iter(popen.stdout.readline, b""):
62 |         if line is not None:
63 |             logger.info(line.decode("utf-8").rstrip())
64 | 
65 |     popen.wait()
66 |     return popen
67 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cuda_vllm.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CUDA vLLM Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   VLLM_USE_V1: 0
24 |   UV_TORCH_BACKEND: auto
25 | 
26 | jobs:
27 |   run_cli_cuda_vllm_single_gpu_tests:
28 |     if: ${{
29 |       (github.event_name == 'push') ||
30 |       (github.event_name == 'workflow_dispatch') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
32 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
33 |       contains( github.event.pull_request.labels.*.name, 'vllm') ||
34 |       contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
35 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_single_gpu')
36 |       }}
37 | 
38 |     runs-on:
39 |       group: aws-g5-4xlarge-plus
40 | 
41 |     steps:
42 |       - name: Checkout
43 |         uses: actions/checkout@v4
44 | 
45 |       - name: Install uv
46 |         uses: astral-sh/setup-uv@v6
47 |         with:
48 |           enable-cache: true
49 | 
50 |       - name: Run tests
51 |         run: |
52 |           make test-cli-cuda-vllm-single
53 | 
54 |       - name: Run examples
55 |         run: |
56 |           make test-cli-cuda-vllm-single-examples
57 | 
58 |   run_cli_cuda_vllm_multi_gpu_tests:
59 |     if: ${{
60 |       (github.event_name == 'push') ||
61 |       (github.event_name == 'workflow_dispatch') ||
62 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
63 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
64 |       contains( github.event.pull_request.labels.*.name, 'vllm') ||
65 |       contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
66 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_multi_gpu')
67 |       }}
68 | 
69 |     runs-on:
70 |       group: aws-g5-12xlarge-plus
71 | 
72 |     steps:
73 |       - name: Checkout
74 |         uses: actions/checkout@v4
75 | 
76 |       - name: Install uv
77 |         uses: astral-sh/setup-uv@v6
78 |         with:
79 |           enable-cache: true
80 | 
81 |       - name: Run tests
82 |         run: |
83 |           make test-cli-cuda-vllm-multi
84 | 
85 |       - name: Run examples
86 |         run: |
87 |           make test-cli-cuda-vllm-multi-examples
88 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/llama_cpp/backend.py:
--------------------------------------------------------------------------------
 1 | from tempfile import TemporaryDirectory
 2 | from typing import Any, Dict
 3 | 
 4 | from llama_cpp import Llama
 5 | 
 6 | from ..base import Backend
 7 | from .config import LlamaCppConfig
 8 | 
 9 | 
10 | class LlamaCppBackend(Backend[LlamaCppConfig]):
11 |     NAME: str = "llama_cpp"
12 | 
13 |     pretrained_model: Llama
14 | 
15 |     def __init__(self, config: LlamaCppConfig) -> None:
16 |         super().__init__(config)
17 | 
18 |     def load(self) -> None:
19 |         self.logger.info("\t+ Creating backend temporary directory")
20 |         self.tmpdir = TemporaryDirectory()
21 |         self.logger.info("\t+ Loading pretrained model")
22 |         self.load_model_from_pretrained()
23 |         self.tmpdir.cleanup()
24 | 
25 |     def load_model_from_pretrained(self) -> None:
26 |         """
27 |         Load the pretrained model from the given model name (normally GGUF, GGML)
28 |         """
29 | 
30 |         self.pretrained_model = Llama.from_pretrained(self.config.model, **self.llama_cpp_kwargs)
31 | 
32 |     @property
33 |     def llama_cpp_kwargs(self) -> Dict[str, Any]:
34 |         return {
35 |             "embedding": self.config.task == "feature-extraction",
36 |             "filename": self.config.filename,
37 |             "verbose": False,
38 |             "echo": False,
39 |         }
40 | 
41 |     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
42 |         if self.config.task == "text-generation":
43 |             if inputs["input_ids"].shape[0] != 1:
44 |                 raise ValueError("Batch size must be 1 for Text Generation with llama-cpp-python")
45 |             return {"tokens": inputs["input_ids"].squeeze(0).tolist()}
46 |         elif self.config.task == "feature-extraction":
47 |             return {"input": [self.pretrained_model.detokenize(x).decode("utf-8") for x in inputs["input_ids"]]}
48 |         else:
49 |             raise ValueError(f"Task {self.config.task} not supported by {self.NAME}")
50 | 
51 |     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> Any:
52 |         self.pretrained_model.embed(**inputs)
53 | 
54 |     def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]:
55 |         generator = self.pretrained_model.generate(**inputs, reset=True)
56 |         for _ in range(kwargs["max_new_tokens"]):
57 |             next(generator)
58 | 
59 |     def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]:
60 |         generator = self.pretrained_model.generate(**inputs, reset=True)
61 |         for _ in range(kwargs["max_new_tokens"]):
62 |             next(generator)
63 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/torchrun/config.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from dataclasses import dataclass, field
 3 | from typing import Any, Dict, Optional
 4 | 
 5 | from ..config import LauncherConfig
 6 | 
 7 | 
 8 | @dataclass
 9 | class TorchrunConfig(LauncherConfig):
10 |     name: str = "torchrun"
11 |     _target_: str = "optimum_benchmark.launchers.torchrun.launcher.TorchrunLauncher"
12 | 
13 |     # Minimum amount of nodes that the user function will be launched on.
14 |     # Elastic agent ensures that the user function start only when the min_nodes amount enters the rendezvous.
15 |     min_nodes: int = 1
16 |     # Maximum amount of nodes that the user function will be launched on.
17 |     max_nodes: int = 1
18 |     # On each node the elastic agent will launch this amount of workers that will execute user defined function.
19 |     nproc_per_node: int = 2
20 |     # User defined role of the worker (defaults to "trainer").
21 |     role: str = "benchmarker"
22 |     # The interval in seconds that is used by the elastic_agent as a period of monitoring workers.
23 |     monitor_interval: int = 30
24 |     # The name of the rdzv store.
25 |     rdzv_id: str = str(uuid.uuid4())
26 |     # rdzv_backend to use in the rendezvous (etcd).
27 |     rdzv_backend: str = "c10d"
28 |     # The endpoint of the rdzv sync. storage.
29 |     rdzv_endpoint: str = "localhost:0"
30 |     # Key, value pair that specifies rendezvous specific configuration.
31 |     rdzv_configs: Dict[str, Any] = field(default_factory=lambda: {"rank": 0, "timeout": -1})
32 |     # The timeout in seconds that is used by the elastic agent to wait for the workers to enter the rendezvous.
33 |     rdzv_timeout: int = -1
34 |     # The maximum amount of restarts that elastic agent will conduct on workers before failure.
35 |     max_restarts: int = 0
36 |     # The method is used by the elastic agent to start the workers (spawn, fork, forkserver).
37 |     start_method: str = "spawn"
38 |     # address of the local node if any. If not set, a lookup on the local machine's FQDN will be performed.
39 |     local_addr: Optional[str] = None
40 | 
41 |     # The socket ifname
42 |     socket_ifname: Optional[str] = None
43 | 
44 |     def __post_init__(self):
45 |         super().__post_init__()
46 | 
47 |         if self.start_method not in ["spawn", "fork"]:
48 |             raise ValueError(f"start_method must be one of ['spawn', 'fork'], got {self.start_method}")
49 | 
50 |         if self.min_nodes != self.max_nodes:
51 |             raise ValueError(
52 |                 f"min_nodes and max_nodes must be equal for a reproducible benchmark, got {self.min_nodes} and {self.max_nodes}"
53 |             )
54 | 


--------------------------------------------------------------------------------
/docker/rocm/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ARG ROCM_VERSION=6.3.4
16 | ARG UBUNTU_VERSION=22.04
17 | 
18 | FROM rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}
19 | 
20 | # Install necessary packages
21 | ENV PATH="/opt/rocm/bin:${PATH}"
22 | ENV DEBIAN_FRONTEND=noninteractive
23 | RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 
24 |     sudo build-essential git bash-completion \
25 |     python3.10 python3-pip python3.10-dev && \
26 |     apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove -y && \
27 |     update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
28 |     pip install --no-cache-dir --upgrade pip setuptools wheel requests && \
29 |     cd /opt/rocm/share/amd_smi && pip install .
30 | 
31 | RUN apt-get update && apt-get upgrade -y && apt-get install -y hipsparse hipblas hipsolver rocthrust && \
32 |     apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove -y
33 | 
34 | # Install PyTorch
35 | ARG TORCH_VERSION=""
36 | ARG TORCH_ROCM=rocm6.3
37 | ARG TORCH_RELEASE_TYPE=stable
38 | 
39 | RUN if [ -n "${TORCH_VERSION}" ]; then \
40 |     pip install --no-cache-dir torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
41 | elif [ "${TORCH_RELEASE_TYPE}" = "stable" ]; then \
42 |     pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/${TORCH_ROCM} ; \
43 | elif [ "${TORCH_RELEASE_TYPE}" = "nightly" ]; then \
44 |     pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/${TORCH_ROCM} ; \
45 | else \
46 |     echo "Error: Invalid TORCH_RELEASE_TYPE. Must be 'stable', 'nightly', or specify a TORCH_VERSION." && exit 1 ; \
47 | fi
48 | 
49 | # Install quantization libraries from source
50 | ENV MAX_JOBS=1
51 | ENV ROCM_VERSION=6.3
52 | ENV GPTQMODEL_FORCE_BUILD=1
53 | ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
54 | 
55 | RUN pip install -v gptqmodel --no-build-isolation --no-cache-dir
56 | 


--------------------------------------------------------------------------------
/examples/cuda_pytorch_llama_quants.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 4 | from optimum_benchmark.logging_utils import setup_logging
 5 | 
 6 | BENCHMARK_NAME = "cuda_pytorch_llama"
 7 | MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 8 | PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None)
 9 | 
10 | WEIGHTS_CONFIGS = {
11 |     "float16": {
12 |         "torch_dtype": "float16",
13 |         "quantization_config": {},
14 |     },
15 |     "4bit-gptq-exllama-v2": {
16 |         "torch_dtype": "float16",
17 |         "quantization_config": {
18 |             "quant_method": "gptq",
19 |             "bits": 4,
20 |             "use_exllama ": True,
21 |             "version": 2,
22 |             "model_seqlen": 256,
23 |         },
24 |     },
25 |     "torchao-int4wo-128": {
26 |         "torch_dtype": "bfloat16",
27 |         "quantization_config": {
28 |             "quant_method": "torchao",
29 |             "quant_type": "int4_weight_only",
30 |             "group_size": 128,
31 |         },
32 |     },
33 | }
34 | 
35 | 
36 | def run_benchmark(weight_config: str):
37 |     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
38 |     backend_config = PyTorchConfig(
39 |         model=MODEL,
40 |         device="cuda",
41 |         device_ids="0",
42 |         no_weights=True,
43 |         **WEIGHTS_CONFIGS[weight_config],
44 |     )
45 |     scenario_config = InferenceConfig(
46 |         memory=True,
47 |         latency=True,
48 |         duration=10,
49 |         iterations=10,
50 |         warmup_runs=10,
51 |         input_shapes={"batch_size": 1, "sequence_length": 64},
52 |         generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32},
53 |     )
54 |     benchmark_config = BenchmarkConfig(
55 |         name=BENCHMARK_NAME,
56 |         launcher=launcher_config,
57 |         scenario=scenario_config,
58 |         backend=backend_config,
59 |         print_report=True,
60 |         log_report=True,
61 |     )
62 |     benchmark_report = Benchmark.launch(benchmark_config)
63 | 
64 |     return benchmark_config, benchmark_report
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     level = os.environ.get("LOG_LEVEL", "INFO")
69 |     to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
70 |     setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
71 | 
72 |     for weight_config in WEIGHTS_CONFIGS:
73 |         benchmark_config, benchmark_report = run_benchmark(weight_config)
74 |         benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
75 | 
76 |         if PUSH_REPO_ID is not None:
77 |             benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME, filename=f"{weight_config}.json")
78 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/py_txi/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dataclasses import dataclass
 3 | from typing import Any, Dict, List, Optional, Union
 4 | 
 5 | from ...import_utils import py_txi_version
 6 | from ...system_utils import is_nvidia_system, is_rocm_system
 7 | from ...task_utils import TEXT_EMBEDDING_TASKS, TEXT_GENERATION_TASKS
 8 | from ..config import BackendConfig
 9 | 
10 | 
11 | @dataclass
12 | class PyTXIConfig(BackendConfig):
13 |     name: str = "py-txi"
14 |     version: Optional[str] = py_txi_version()
15 |     _target_: str = "optimum_benchmark.backends.py_txi.backend.PyTXIBackend"
16 | 
17 |     # optimum-benchmark specific
18 |     no_weights: bool = False
19 | 
20 |     # Image to use for the container
21 |     image: Optional[str] = None
22 |     # Shared memory size for the container
23 |     shm_size: Optional[str] = None
24 |     # List of custom devices to forward to the container e.g. ["/dev/kfd", "/dev/dri"] for ROCm
25 |     devices: Optional[List[str]] = None
26 |     # NVIDIA-docker GPU device options e.g. "all" (all) or "0,1,2,3" (ids) or 4 (count)
27 |     gpus: Optional[Union[str, int]] = None
28 |     # Things to forward to the container
29 |     ports: Optional[Dict[str, Any]] = None
30 |     environment: Optional[List[str]] = None
31 |     volumes: Optional[Dict[str, Any]] = None
32 |     # First connection/request
33 |     connection_timeout: Optional[int] = None
34 |     first_request_timeout: Optional[int] = None
35 |     max_concurrent_requests: Optional[int] = None
36 | 
37 |     # Common options
38 |     dtype: Optional[str] = None
39 |     # TEI specific
40 |     pooling: Optional[str] = None
41 |     # TGI specific
42 |     sharded: Optional[str] = None
43 |     quantize: Optional[str] = None
44 |     num_shard: Optional[int] = None
45 |     speculate: Optional[int] = None
46 |     cuda_graphs: Optional[int] = None
47 |     trust_remote_code: Optional[bool] = None
48 |     disable_custom_kernels: Optional[bool] = None
49 | 
50 |     def __post_init__(self):
51 |         super().__post_init__()
52 | 
53 |         if self.task not in TEXT_GENERATION_TASKS + TEXT_EMBEDDING_TASKS:
54 |             raise NotImplementedError(f"TXI does not support task {self.task}")
55 | 
56 |         # Device options
57 |         if self.device_ids is not None and is_nvidia_system() and self.gpus is None:
58 |             self.gpus = self.device_ids
59 | 
60 |         if self.device_ids is not None and is_rocm_system() and self.devices is None:
61 |             ids = list(map(int, self.device_ids.split(",")))
62 |             renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")]
63 |             self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in ids]
64 | 
65 |         self.trust_remote_code = self.model_kwargs.get("trust_remote_code", None)
66 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cuda_pytorch.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CUDA PyTorch Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_TORCH_BACKEND: auto
24 | 
25 | jobs:
26 |   run_cli_cuda_pytorch_single_gpu_tests:
27 |     if: ${{
28 |       (github.event_name == 'push') ||
29 |       (github.event_name == 'workflow_dispatch') ||
30 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
32 |       contains( github.event.pull_request.labels.*.name, 'pytorch') ||
33 |       contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
34 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_single_gpu')
35 |       }}
36 | 
37 |     runs-on:
38 |       group: aws-g5-4xlarge-plus
39 | 
40 |     steps:
41 |       - name: Checkout
42 |         uses: actions/checkout@v4
43 | 
44 |       - name: Install uv
45 |         uses: astral-sh/setup-uv@v6
46 |         with:
47 |           enable-cache: true
48 | 
49 |       - name: Run tests
50 |         run: |
51 |           make test-cli-cuda-pytorch-single
52 | 
53 |       - name: Run examples
54 |         run: |
55 |           make test-cli-cuda-pytorch-single-examples
56 | 
57 |   run_cli_cuda_pytorch_multi_gpu_tests:
58 |     if: ${{
59 |       (github.event_name == 'push') ||
60 |       (github.event_name == 'workflow_dispatch') ||
61 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
62 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
63 |       contains( github.event.pull_request.labels.*.name, 'pytorch') ||
64 |       contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
65 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_multi_gpu')
66 |       }}
67 | 
68 |     runs-on:
69 |       group: aws-g5-12xlarge-plus
70 | 
71 |     # need the devel image for deepspeed compilation
72 |     container:
73 |       image: nvidia/cuda:12.8.1-devel-ubuntu22.04
74 |       options: --ipc host --gpus all
75 | 
76 |     steps:
77 |       - name: Checkout
78 |         uses: actions/checkout@v4
79 | 
80 |       - name: Install uv
81 |         uses: astral-sh/setup-uv@v6
82 |         with:
83 |           enable-cache: true
84 | 
85 |       - name: Run tests
86 |         run: |
87 |           make test-cli-cuda-pytorch-multi
88 | 
89 |       - name: Run examples
90 |         run: |
91 |           make test-cli-cuda-pytorch-multi-examples
92 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli_cuda_tensorrt_llm.yaml:
--------------------------------------------------------------------------------
 1 | name: CLI CUDA TensorRT-LLM Tests
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |     types:
12 |       - opened
13 |       - reopened
14 |       - synchronize
15 |       - labeled
16 |       - unlabeled
17 | 
18 | concurrency:
19 |   cancel-in-progress: true
20 |   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
21 | 
22 | env:
23 |   UV_SYSTEM_PYTHON: 1
24 |   UV_TORCH_BACKEND: auto
25 | 
26 | jobs:
27 |   cli_cuda_tensorrt_llm_single_gpu_tests:
28 |     if: ${{
29 |       (github.event_name == 'push') ||
30 |       (github.event_name == 'workflow_dispatch') ||
31 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
32 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
33 |       contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
34 |       contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
35 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm')
36 |       }}
37 | 
38 |     runs-on:
39 |       group: aws-g5-4xlarge-plus
40 | 
41 |     container:
42 |       image: huggingface/optimum-nvidia:latest
43 |       options: --ipc host --gpus all
44 | 
45 |     steps:
46 |       - name: Checkout
47 |         uses: actions/checkout@v4
48 | 
49 |       - name: Install uv
50 |         uses: astral-sh/setup-uv@v6
51 | 
52 |       - name: Run tests
53 |         run: |
54 |           make test-cli-cuda-tensorrt-llm-single
55 | 
56 |       - name: Run examples
57 |         run: |
58 |           make test-cli-cuda-tensorrt-llm-single-examples
59 | 
60 |   cli_cuda_tensorrt_llm_multi_gpu_tests:
61 |     if: ${{
62 |       (github.event_name == 'push') ||
63 |       (github.event_name == 'workflow_dispatch') ||
64 |       contains( github.event.pull_request.labels.*.name, 'cli') ||
65 |       contains( github.event.pull_request.labels.*.name, 'cuda') ||
66 |       contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') ||
67 |       contains( github.event.pull_request.labels.*.name, 'multi_gpu') ||
68 |       contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu')
69 |       }}
70 | 
71 |     runs-on:
72 |       group: aws-g5-12xlarge-plus
73 | 
74 |     container:
75 |       image: huggingface/optimum-nvidia:latest
76 |       options: --ipc host --gpus all
77 | 
78 |     steps:
79 |       - name: Checkout
80 |         uses: actions/checkout@v4
81 | 
82 |       - name: Install uv
83 |         uses: astral-sh/setup-uv@v6
84 | 
85 |       - name: Run tests
86 |         run: |
87 |           make test-cli-cuda-tensorrt-llm-multi
88 | 
89 |       - name: Run examples
90 |         run: |
91 |           make test-cli-cuda-tensorrt-llm-multi-examples
92 | 


--------------------------------------------------------------------------------
/.github/workflows/images.yaml:
--------------------------------------------------------------------------------
 1 | name: Build and Publish Docker Images
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |     paths:
 9 |       - docker/**
10 |       - .github/workflows/images.yaml
11 |   pull_request:
12 |     branches:
13 |       - main
14 |     types:
15 |       - opened
16 |       - reopened
17 |       - synchronize
18 |       - labeled
19 |       - unlabeled
20 |   schedule:
21 |     - cron: "0 0 * * *"
22 | 
23 | concurrency:
24 |   cancel-in-progress: true
25 |   group: ${{ github.workflow }}-${{ github.ref }}
26 | 
27 | env:
28 |   REGISTRY: ghcr.io
29 |   IMAGE_NAME: ${{ github.repository }}
30 |   BUILDX_CACHE_TTL: 604800 # Cache expiration in seconds (e.g., 7 days)
31 | 
32 | jobs:
33 |   publish:
34 |     if: ${{
35 |       github.event_name == 'push' ||
36 |       github.event_name == 'schedule' ||
37 |       github.event_name == 'workflow_dispatch' ||
38 |       contains(github.event.pull_request.labels.*.name, 'docker')
39 |       }}
40 | 
41 |     strategy:
42 |       fail-fast: true
43 |       matrix:
44 |         image_flavor: [cpu, cuda, rocm]
45 | 
46 |     runs-on: ubuntu-latest
47 | 
48 |     permissions:
49 |       contents: write
50 |       packages: write
51 |       id-token: write
52 | 
53 |     steps:
54 |       - name: Free Disk Space
55 |         uses: jlumbroso/free-disk-space@main
56 |         with:
57 |           tool-cache: true
58 | 
59 |       - name: Checkout code
60 |         uses: actions/checkout@v4
61 | 
62 |       - name: Set up Docker Buildx
63 |         uses: docker/setup-buildx-action@v3
64 |         with:
65 |           buildkitd-flags: --debug
66 | 
67 |       - name: Login to GitHub Container Registry
68 |         uses: docker/login-action@v3
69 |         with:
70 |           registry: ghcr.io
71 |           username: ${{ github.actor }}
72 |           password: ${{ secrets.GITHUB_TOKEN }}
73 | 
74 |       - name: Extract metadata (tags, labels) for Docker images
75 |         id: meta
76 |         uses: docker/metadata-action@v5
77 |         with:
78 |           flavor: |
79 |             latest=false
80 |           images: |
81 |             ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
82 |           tags: |
83 |             type=raw,value=latest-${{ matrix.image_flavor }}
84 |             type=raw,value=sha-${{ github.sha }}-${{ matrix.image_flavor }}
85 | 
86 |       - name: Build and push Docker images
87 |         uses: docker/build-push-action@v5
88 |         id: push
89 |         with:
90 |           context: .
91 |           push: true
92 |           tags: ${{ steps.meta.outputs.tags }}
93 |           labels: ${{ steps.meta.outputs.labels }}
94 |           file: docker/${{ matrix.image_flavor }}/Dockerfile
95 |           cache-to: type=gha,mode=min,scope=docker-cache-${{ matrix.image_flavor }}
96 |           cache-from: type=gha,mode=max,scope=docker-cache-${{ matrix.image_flavor }}
97 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/training/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from logging import getLogger
 3 | from typing import Any, Dict
 4 | 
 5 | from ..config import ScenarioConfig
 6 | 
 7 | LOGGER = getLogger("training")
 8 | 
 9 | TRAINING_ARGUMENT = {
10 |     "per_device_train_batch_size": 2,
11 |     "gradient_accumulation_steps": 1,
12 |     "output_dir": "./trainer_output",
13 |     "eval_strategy": "no",
14 |     "save_strategy": "no",
15 |     "do_train": True,
16 |     "use_cpu": False,
17 |     "max_steps": -1,
18 |     # disable evaluation
19 |     "do_eval": False,
20 |     "do_predict": False,
21 |     # disable custom logging
22 |     "report_to": "none",
23 |     # disbale transformers memory metrics
24 |     "skip_memory_metrics": True,
25 |     # from pytorch warning: "this flag results in an extra traversal of the
26 |     # autograd graph every iteration which can adversely affect performance."
27 |     "ddp_find_unused_parameters": False,
28 | }
29 | 
30 | DATASET_SHAPES = {"dataset_size": 500, "sequence_length": 16, "num_choices": 1}
31 | 
32 | 
33 | @dataclass
34 | class TrainingConfig(ScenarioConfig):
35 |     name: str = "training"
36 |     _target_: str = "optimum_benchmark.scenarios.training.scenario.TrainingScenario"
37 | 
38 |     # training options
39 |     max_steps: int = 140
40 |     warmup_steps: int = 40
41 | 
42 |     # dataset options
43 |     dataset_shapes: Dict[str, Any] = field(default_factory=dict)
44 |     # training options
45 |     training_arguments: Dict[str, Any] = field(default_factory=dict)
46 | 
47 |     # tracking options
48 |     latency: bool = field(default=True, metadata={"help": "Measure latencies and throughputs"})
49 |     memory: bool = field(default=False, metadata={"help": "Measure max memory usage"})
50 |     energy: bool = field(default=False, metadata={"help": "Measure energy usage"})
51 | 
52 |     def __post_init__(self):
53 |         super().__post_init__()
54 | 
55 |         self.dataset_shapes = {**DATASET_SHAPES, **self.dataset_shapes}
56 |         self.training_arguments = {**TRAINING_ARGUMENT, **self.training_arguments}
57 | 
58 |         if self.training_arguments["max_steps"] == -1:
59 |             self.training_arguments["max_steps"] = self.max_steps
60 | 
61 |         if self.max_steps != self.training_arguments["max_steps"]:
62 |             LOGGER.warning(
63 |                 f"`scenario.max_steps` ({self.max_steps}) and `scenario.training_arguments.max_steps` "
64 |                 f"({self.training_arguments['max_steps']}) are different. "
65 |                 "Using `scenario.training_arguments.max_steps`."
66 |             )
67 |             self.max_steps = self.training_arguments["max_steps"]
68 | 
69 |         if self.warmup_steps > self.max_steps:
70 |             raise ValueError(
71 |                 f"`scenario.warmup_steps` ({self.warmup_steps}) must be smaller than `scenario.max_steps` ({self.max_steps})"
72 |             )
73 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/timm_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from transformers import PretrainedConfig
 4 | 
 5 | from ..import_utils import is_timm_available
 6 | 
 7 | if is_timm_available():
 8 |     from timm import create_model
 9 |     from timm.models import get_pretrained_cfg, load_model_config_from_hf, parse_model_name
10 | 
11 | 
12 | def get_timm_model_creator():
13 |     if not is_timm_available():
14 |         raise ImportError("timm is not available. Please, pip install timm.")
15 | 
16 |     return create_model
17 | 
18 | 
19 | def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig":
20 |     if not is_timm_available():
21 |         raise ImportError("timm is not available. Please, pip install timm.")
22 | 
23 |     model_source, model_name = parse_model_name(model_name)
24 |     if model_source == "hf-hub":
25 |         # For model names specified in the form `hf-hub:path/architecture_name@revision`,
26 |         # load model weights + pretrained_cfg from Hugging Face hub.
27 |         pretrained_cfg, model_name = load_model_config_from_hf(model_name)
28 |         return pretrained_cfg
29 | 
30 |     return get_pretrained_cfg(model_name)
31 | 
32 | 
33 | def extract_timm_shapes_from_config(config: "PretrainedConfig") -> Dict[str, Any]:
34 |     if not is_timm_available():
35 |         raise ImportError("timm is not available. Please, pip install timm.")
36 | 
37 |     artifacts_dict = {}
38 | 
39 |     config_dict = {k: v for k, v in config.to_dict().items() if v is not None}
40 |     artifacts_dict.update(config_dict)
41 | 
42 |     shapes = {}
43 | 
44 |     # image input
45 |     if "num_channels" in artifacts_dict:
46 |         shapes["num_channels"] = artifacts_dict.get("num_channels", None)
47 |     elif "channels" in artifacts_dict:
48 |         shapes["num_channels"] = artifacts_dict.get("channels", None)
49 | 
50 |     if "image_size" in artifacts_dict:
51 |         image_size = artifacts_dict["image_size"]
52 |     elif "size" in artifacts_dict:
53 |         image_size = artifacts_dict["size"]
54 |     else:
55 |         image_size = None
56 | 
57 |     if isinstance(image_size, (int, float)):
58 |         shapes["height"] = image_size
59 |         shapes["width"] = image_size
60 |     elif isinstance(image_size, (list, tuple)):
61 |         shapes["height"] = image_size[0]
62 |         shapes["width"] = image_size[0]
63 |     elif isinstance(image_size, dict) and len(image_size) == 2:
64 |         shapes["height"] = list(image_size.values())[0]
65 |         shapes["width"] = list(image_size.values())[1]
66 |     elif isinstance(image_size, dict) and len(image_size) == 1:
67 |         shapes["height"] = list(image_size.values())[0]
68 |         shapes["width"] = list(image_size.values())[0]
69 | 
70 |     if "input_size" in artifacts_dict:
71 |         input_size = artifacts_dict.get("input_size", None)
72 |         shapes["num_channels"] = input_size[0]
73 |         shapes["height"] = input_size[1]
74 |         shapes["width"] = input_size[2]
75 | 
76 |     return shapes
77 | 


--------------------------------------------------------------------------------
/optimum_benchmark/benchmark/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from logging import getLogger
 3 | from typing import TYPE_CHECKING, Type
 4 | 
 5 | from hydra.utils import get_class
 6 | 
 7 | from ..backends.config import BackendConfig
 8 | from ..hub_utils import PushToHubMixin, classproperty
 9 | from ..launchers import LauncherConfig
10 | from ..scenarios import ScenarioConfig
11 | from .config import BenchmarkConfig
12 | from .report import BenchmarkReport
13 | 
14 | if TYPE_CHECKING:
15 |     from ..backends.base import Backend
16 |     from ..launchers.base import Launcher
17 |     from ..scenarios.base import Scenario
18 | 
19 | 
20 | LOGGER = getLogger("benchmark")
21 | 
22 | 
23 | @dataclass
24 | class Benchmark(PushToHubMixin):
25 |     config: BenchmarkConfig
26 |     report: BenchmarkReport
27 | 
28 |     def __post_init__(self):
29 |         if isinstance(self.config, dict):
30 |             self.config = BenchmarkConfig.from_dict(self.config)
31 |         elif not isinstance(self.config, BenchmarkConfig):
32 |             raise ValueError("config must be either a dict or a BenchmarkConfig instance")
33 | 
34 |         if isinstance(self.report, dict):
35 |             self.report = BenchmarkReport.from_dict(self.report)
36 |         elif not isinstance(self.report, BenchmarkReport):
37 |             raise ValueError("report must be either a dict or a BenchmarkReport instance")
38 | 
39 |     @staticmethod
40 |     def launch(config: BenchmarkConfig):
41 |         """
42 |         Runs an benchmark using specified launcher configuration/logic
43 |         """
44 | 
45 |         # Allocate requested launcher
46 |         launcher_config: LauncherConfig = config.launcher
47 |         launcher_factory: Type[Launcher] = get_class(launcher_config._target_)
48 |         launcher: Launcher = launcher_factory(launcher_config)
49 | 
50 |         # Launch the benchmark using the launcher
51 |         report = launcher.launch(worker=Benchmark.run, worker_args=[config])
52 | 
53 |         if config.log_report:
54 |             report.log()
55 | 
56 |         if config.print_report:
57 |             report.print()
58 | 
59 |         return report
60 | 
61 |     @staticmethod
62 |     def run(config: BenchmarkConfig):
63 |         """
64 |         Runs a scenario using specified backend configuration/logic
65 |         """
66 | 
67 |         # Allocate requested backend
68 |         backend_config: BackendConfig = config.backend
69 |         backend_factory: Type[Backend] = get_class(backend_config._target_)
70 |         backend: Backend = backend_factory(backend_config)
71 | 
72 |         # Allocate requested scenario
73 |         scenario_config: ScenarioConfig = config.scenario
74 |         scenario_factory: Type[Scenario] = get_class(scenario_config._target_)
75 |         scenario: Scenario = scenario_factory(scenario_config)
76 | 
77 |         # Run the scenario using the backend
78 |         report = scenario.run(backend)
79 | 
80 |         return report
81 | 
82 |     @classproperty
83 |     def default_filename(cls) -> str:
84 |         return "benchmark.json"
85 | 


--------------------------------------------------------------------------------
/optimum_benchmark/cli.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | from logging import getLogger
 4 | 
 5 | import hydra
 6 | from hydra.core.config_store import ConfigStore
 7 | from omegaconf import DictConfig, OmegaConf
 8 | 
 9 | from . import (
10 |     Benchmark,
11 |     BenchmarkConfig,
12 |     EnergyStarConfig,
13 |     InferenceConfig,
14 |     InlineConfig,
15 |     IPEXConfig,
16 |     LlamaCppConfig,
17 |     ONNXRuntimeConfig,
18 |     OpenVINOConfig,
19 |     ProcessConfig,
20 |     PyTorchConfig,
21 |     PyTXIConfig,
22 |     TorchrunConfig,
23 |     TrainingConfig,
24 |     TRTLLMConfig,
25 |     VLLMConfig,
26 | )
27 | from .logging_utils import setup_logging
28 | 
29 | LOGGER = getLogger("hydra-cli")
30 | 
31 | 
32 | # Register configurations
33 | cs = ConfigStore.instance()
34 | # benchmark configuration
35 | cs.store(name="benchmark", node=BenchmarkConfig)
36 | # backends configurations
37 | cs.store(group="backend", name=IPEXConfig.name, node=IPEXConfig)
38 | cs.store(group="backend", name=OpenVINOConfig.name, node=OpenVINOConfig)
39 | cs.store(group="backend", name=PyTorchConfig.name, node=PyTorchConfig)
40 | cs.store(group="backend", name=ONNXRuntimeConfig.name, node=ONNXRuntimeConfig)
41 | cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig)
42 | cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig)
43 | cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig)
44 | cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig)
45 | # scenarios configurations
46 | cs.store(group="scenario", name=TrainingConfig.name, node=TrainingConfig)
47 | cs.store(group="scenario", name=InferenceConfig.name, node=InferenceConfig)
48 | cs.store(group="scenario", name=EnergyStarConfig.name, node=EnergyStarConfig)
49 | # launchers configurations
50 | cs.store(group="launcher", name=InlineConfig.name, node=InlineConfig)
51 | cs.store(group="launcher", name=ProcessConfig.name, node=ProcessConfig)
52 | cs.store(group="launcher", name=TorchrunConfig.name, node=TorchrunConfig)
53 | 
54 | 
55 | # optimum-benchmark
56 | @hydra.main(version_base=None)
57 | def main(config: DictConfig) -> None:
58 |     log_level = os.environ.get("LOG_LEVEL", "INFO")
59 |     log_to_file = os.environ.get("LOG_TO_FILE", "1") == "1"
60 |     override_benchmarks = os.environ.get("OVERRIDE_BENCHMARKS", "0") == "1"
61 |     setup_logging(level=log_level, to_file=log_to_file, prefix="MAIN-PROCESS")
62 | 
63 |     if glob.glob("benchmark_report.json") and not override_benchmarks:
64 |         LOGGER.warning(
65 |             "Benchmark was already conducted in the current directory. "
66 |             "If you want to override it, set the environment variable OVERRIDE_BENCHMARKS=1 (in hydra.job.env_set)"
67 |         )
68 |         return
69 | 
70 |     # Instantiates the configuration with the right class and triggers its __post_init__
71 |     benchmark_config: BenchmarkConfig = OmegaConf.to_object(config)
72 |     benchmark_config.save_json("benchmark_config.json")
73 | 
74 |     benchmark_report = Benchmark.launch(benchmark_config)
75 |     benchmark_report.save_markdown("benchmark_report.md")
76 |     benchmark_report.save_json("benchmark_report.json")
77 |     benchmark_report.save_text("benchmark_report.txt")
78 | 
79 |     benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
80 |     benchmark.save_json("benchmark.json")
81 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # How to contribute to Optimum-Benchmark?
 3 | 
 4 | `optimum-benchmark` is an open source project, so all contributions and suggestions are welcome.
 5 | 
 6 | You can contribute in many different ways: giving ideas, answering questions, reporting bugs, proposing enhancements, improving the documentation, fixing bugs,...
 7 | 
 8 | Many thanks in advance to every contributor.
 9 | 
10 | ## How to work on an open Issue?
11 | 
12 | You have the list of open Issues at: <https://github.com/huggingface/optimum-benchmark/issues>
13 | 
14 | If you would like to work on any of the open Issues:
15 | 
16 | 1. Make sure it is not already assigned to someone else. You have the assignee (if any) on the top of the right column of the Issue page. If it is not assigned, you can assign it to yourself by clicking on the "Assign yourself" button, or by leaving a comment on the Issue page.
17 | 
18 | 2. Create a Pull Request.
19 | 
20 | ## How to create a Pull Request?
21 | 
22 | 1. Fork the [repository](https://github.com/huggingface/optimum-benchmark) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account.
23 | 
24 | 2. Clone your fork to your local disk, and add the base repository as a remote:
25 | 
26 | 	```bash
27 | 	git clone https://github.com/<Your Github Username>/optimum-benchmark.git
28 | 	cd optimum-benchmark
29 | 	git remote add upstream https://github.com/huggingface/optimum-benchmark.git
30 | 	```
31 | 
32 | 3. Create a new branch to hold your development changes:
33 | 
34 | 	```bash
35 | 	git checkout -b name-of-your-branch
36 | 	```
37 | 	
38 | 	**do not** work on the `main` branch.
39 | 
40 | 4. Set up a development environment by running the following command in a virtual environment:
41 | 
42 | 	```bash
43 | 	pip install -e .[quality,testing]
44 | 	```
45 | 
46 | 5. Develop the features or fix the bug you want to work on.
47 | 
48 | 6. Depending on the feature you're working on and your development environment, you can run tests locally in an isolated docker container using the [makefile](Makefile). For example, to test the CLI with CPU device and PyTorch backend, you can run the following commands:
49 | 
50 | 	```bash
51 | 	make install_cli_cpu_pytorch
52 | 	make test_cli_cpu_pytorch
53 | 	```
54 | 
55 | 	For a better development experience, we recommend using isolated docker containers to run tests:
56 | 	
57 | 	```bash
58 | 	make build_cpu_image
59 | 	make run_cpu_container
60 | 	make install_cli_cpu_pytorch
61 | 	make test_cli_cpu_pytorch
62 | 	```
63 | 
64 | 	You can find more information about the available make commands in the [Makefile](Makefile).
65 | 
66 | 7. Make sure your code is properly formatted and linted by running:
67 | 
68 | 	```bash
69 | 	make style
70 | 	```
71 | 
72 | 8. Once you're happy with your changes, add the changed files using `git add` and make a commit with `git commit` to record your changes locally:
73 | 
74 | 	```bash
75 | 	git add modified_file.py
76 | 	git commit
77 | 	```
78 | 
79 | 	It is a good idea to sync your copy of the code with the original repository regularly. This way you can quickly account for changes:
80 | 
81 | 	```bash
82 | 	git fetch upstream
83 | 	git rebase upstream/main
84 | 	```
85 | 
86 | 	Push the changes to your account using:
87 | 
88 | 	```bash
89 | 	git push -u origin name-of-your-branch
90 | 	```
91 | 
92 | 9. Once you are satisfied, go the webpage of your fork on GitHub. Click on "Pull request" to send your to the project maintainers for review.
93 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/pytorch/config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from logging import getLogger
 3 | from typing import Any, Dict, Optional
 4 | 
 5 | from ...import_utils import torch_version
 6 | from ..config import BackendConfig
 7 | 
 8 | AMP_DTYPES = ["bfloat16", "float16"]
 9 | TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"]
10 | 
11 | QUANTIZATION_CONFIGS = {"bnb": {"llm_int8_threshold": 0.0}}
12 | 
13 | 
14 | LOGGER = getLogger(__name__)
15 | 
16 | 
17 | @dataclass
18 | class PyTorchConfig(BackendConfig):
19 |     name: str = "pytorch"
20 |     version: Optional[str] = torch_version()
21 |     _target_: str = "optimum_benchmark.backends.pytorch.backend.PyTorchBackend"
22 | 
23 |     # load options
24 |     no_weights: bool = False
25 |     tp_plan: Optional[str] = None
26 |     device_map: Optional[str] = None
27 |     torch_dtype: Optional[str] = None
28 | 
29 |     # optimization options
30 |     eval_mode: bool = True
31 |     to_bettertransformer: bool = False
32 |     low_cpu_mem_usage: Optional[bool] = None
33 |     attn_implementation: Optional[str] = None
34 |     cache_implementation: Optional[str] = None
35 | 
36 |     # tf32 options
37 |     allow_tf32: bool = False
38 | 
39 |     # autocast options
40 |     autocast_enabled: bool = False
41 |     autocast_dtype: Optional[str] = None
42 | 
43 |     # torch compile options
44 |     torch_compile: bool = False
45 |     torch_compile_target: str = "forward"
46 |     torch_compile_config: Dict[str, Any] = field(default_factory=dict)
47 | 
48 |     # quantization options
49 |     quantization_scheme: Optional[str] = None
50 |     quantization_config: Dict[str, Any] = field(default_factory=dict)
51 | 
52 |     # distributed inference options
53 |     deepspeed_inference: bool = False
54 |     deepspeed_inference_config: Dict[str, Any] = field(default_factory=dict)
55 | 
56 |     # peft options
57 |     peft_type: Optional[str] = None
58 |     peft_config: Dict[str, Any] = field(default_factory=dict)
59 | 
60 |     def __post_init__(self):
61 |         super().__post_init__()
62 | 
63 |         if self.model_kwargs.get("torch_dtype", None) is not None:
64 |             raise ValueError(
65 |                 "`torch_dtype` is an explicit argument in the PyTorch backend config. "
66 |                 "Please remove it from the `model_kwargs` and set it in the backend config directly."
67 |             )
68 | 
69 |         if self.torch_dtype is not None and self.torch_dtype not in TORCH_DTYPES:
70 |             raise ValueError(f"`torch_dtype` should be one of None or {TORCH_DTYPES}, got {self.torch_dtype}")
71 | 
72 |         if self.autocast_dtype is not None and self.autocast_dtype not in AMP_DTYPES:
73 |             raise ValueError(f"`autocast_dtype` must be one of {AMP_DTYPES}. Got {self.autocast_dtype} instead.")
74 | 
75 |         if self.quantization_scheme is not None:
76 |             LOGGER.warning(
77 |                 "`backend.quantization_scheme` is deprecated and will be removed in a future version. "
78 |                 "Please use `quantization_config.quant_method` instead."
79 |             )
80 |             if self.quantization_config is None:
81 |                 self.quantization_config = {"quant_method": self.quantization_scheme}
82 |             else:
83 |                 self.quantization_config["quant_method"] = self.quantization_scheme
84 | 
85 |         if self.quantization_config is not None:
86 |             self.quantization_config = dict(
87 |                 QUANTIZATION_CONFIGS.get(self.quantization_scheme, {}),  # default config
88 |                 **self.quantization_config,  # user config (overwrites default)
89 |             )
90 | 


--------------------------------------------------------------------------------
/tests/test_examples.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from logging import getLogger
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output
 8 | 
 9 | LOGGER = getLogger("test-examples")
10 | 
11 | os.environ["TRANSFORMERS_IS_CI"] = "1"
12 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
13 | 
14 | TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples"
15 | TEST_CONFIG_NAMES = [
16 |     config.split(".")[0]
17 |     for config in os.listdir(TEST_CONFIG_DIR)
18 |     if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
19 | ]
20 | TEST_SCRIPT_PATHS = [
21 |     str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")
22 | ]
23 | 
24 | ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
25 | CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)
26 | 
27 | 
28 | @pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
29 | def test_cli_configs(config_name):
30 |     if config_name == "cpu_ipex_bert":
31 |         model = "hf-internal-testing/tiny-random-BertModel"
32 |     elif config_name == "cpu_ipex_llama":
33 |         model = "hf-internal-testing/tiny-random-LlamaForCausalLM"
34 |     elif config_name == "cpu_llama_cpp_text_generation":
35 |         model = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
36 |     elif config_name == "cpu_llama_cpp_embedding":
37 |         model = "nomic-ai/nomic-embed-text-v1.5-GGUF"
38 |     elif config_name == "cpu_onnxruntime_static_quant_vit":
39 |         model = "hf-internal-testing/tiny-random-ViTModel"
40 |     elif config_name == "cpu_openvino_8bit_bert":
41 |         model = "hf-internal-testing/tiny-random-BertModel"
42 |     elif config_name == "cpu_openvino_diffusion":
43 |         model = "hf-internal-testing/tiny-stable-diffusion-torch"
44 |     elif config_name == "cuda_pytorch_bert":
45 |         model = "hf-internal-testing/tiny-random-BertModel"
46 |     elif config_name.startswith("cuda_pytorch_llama"):
47 |         model = "hf-internal-testing/tiny-random-LlamaForCausalLM"
48 |     elif config_name == "cuda_pytorch_vlm":
49 |         model = "hf-internal-testing/tiny-random-Qwen2VLForConditionalGeneration"
50 |     elif config_name in ["cuda_tgi_llama", "cuda_trt_llama", "cuda_vllm_llama"]:
51 |         model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
52 |     else:
53 |         raise ValueError(f"Unsupported config name: {config_name}")
54 | 
55 |     args = [
56 |         "optimum-benchmark",
57 |         "--config-dir",
58 |         TEST_CONFIG_DIR,
59 |         "--config-name",
60 |         config_name,
61 |         "scenario.warmup_runs=1",
62 |         "scenario.input_shapes.batch_size=1",
63 |         "++scenario.input_shapes.sequence_length=16",
64 |         "++scenario.generate_kwargs.max_new_tokens=16",
65 |         "++scenario.generate_kwargs.min_new_tokens=16",
66 |         "++scenario.call_kwargs.num_inference_steps=4",
67 |         "backend.model=" + model,
68 |         "++backend.reshape_kwargs.batch_size=1",
69 |         "++backend.reshape_kwargs.sequence_length=16",
70 |     ]
71 | 
72 |     if ROCR_VISIBLE_DEVICES is not None:
73 |         args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
74 |     elif CUDA_VISIBLE_DEVICES is not None:
75 |         args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']
76 | 
77 |     popen = run_subprocess_and_log_stream_output(LOGGER, args)
78 |     assert popen.returncode == 0, f"Failed to run {config_name}"
79 | 
80 | 
81 | @pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS)
82 | def test_api_scripts(script_path):
83 |     args = ["python", script_path]
84 | 
85 |     popen = run_subprocess_and_log_stream_output(LOGGER, args)
86 |     assert popen.returncode == 0, f"Failed to run {script_path}"
87 | 


--------------------------------------------------------------------------------
/optimum_benchmark/launchers/base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import sys
 4 | import tempfile
 5 | from abc import ABC
 6 | from contextlib import contextmanager
 7 | from logging import getLogger
 8 | from multiprocessing import Process, set_executable
 9 | from typing import Any, Callable, ClassVar, Generic, List, Optional
10 | 
11 | from ..benchmark.report import BenchmarkReport
12 | from ..system_utils import is_nvidia_system, is_rocm_system
13 | from .config import LauncherConfigT
14 | from .device_isolation_utils import assert_device_isolation
15 | 
16 | NUMA_EXECUTABLE_CONTENT = """#!/bin/bash
17 | echo "Running with numactl wrapper"
18 | echo "numactl path: {numactl_path}"
19 | echo "numactl args: {numactl_args}"
20 | echo "python path: {python_path}"
21 | echo "python args: $@"
22 | {numactl_path} {numactl_args} {python_path} "$@"
23 | """
24 | 
25 | 
26 | class Launcher(Generic[LauncherConfigT], ABC):
27 |     NAME: ClassVar[str]
28 | 
29 |     config: LauncherConfigT
30 | 
31 |     def __init__(self, config: LauncherConfigT):
32 |         self.config = config
33 |         self.logger = getLogger(self.NAME)
34 |         self.logger.info(f"Allocated {self.NAME} launcher")
35 | 
36 |     def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
37 |         raise NotImplementedError("Launcher must implement launch method")
38 | 
39 |     @contextmanager
40 |     def device_isolation(self, pid: int, device_ids: Optional[str] = None):
41 |         if device_ids is None:
42 |             if is_rocm_system():
43 |                 device_ids = os.environ.get("ROCR_VISIBLE_DEVICES", None)
44 |             elif is_nvidia_system():
45 |                 device_ids = os.environ.get("CUDA_VISIBLE_DEVICES", None)
46 | 
47 |         self.device_isolation_process = Process(
48 |             target=assert_device_isolation,
49 |             kwargs={"action": self.config.device_isolation_action, "device_ids": device_ids, "pid": pid},
50 |             daemon=True,
51 |         )
52 |         self.device_isolation_process.start()
53 |         self.logger.info(f"\t+ Isolating device(s) [{device_ids}] for process [{pid}] and its children")
54 |         self.logger.info(f"\t+ Executing action [{self.config.device_isolation_action}] in case of violation")
55 | 
56 |         yield
57 | 
58 |         self.logger.info("\t+ Stopping device isolation process")
59 |         self.device_isolation_process.terminate()
60 |         self.device_isolation_process.join()
61 |         self.device_isolation_process.close()
62 | 
63 |     @contextmanager
64 |     def numactl_executable(self):
65 |         self.logger.info("\t+ Warming up multiprocessing context")
66 |         dummy_process = Process(target=dummy_target, daemon=False)
67 |         dummy_process.start()
68 |         dummy_process.join()
69 |         dummy_process.close()
70 | 
71 |         self.logger.info("\t+ Creating numactl wrapper executable for multiprocessing")
72 |         python_path = sys.executable
73 |         numactl_path = shutil.which("numactl")
74 |         if numactl_path is None:
75 |             raise RuntimeError("ِCould not find numactl executable. Please install numactl and try again.")
76 |         numactl_args = " ".join([f"--{key}={value}" for key, value in self.config.numactl_kwargs.items()])
77 |         numa_executable = tempfile.NamedTemporaryFile(delete=False, prefix="numa_executable_", suffix=".sh")
78 |         numa_executable_content = NUMA_EXECUTABLE_CONTENT.format(
79 |             numactl_path=numactl_path, numactl_args=numactl_args, python_path=python_path
80 |         )
81 |         numa_executable.write(numa_executable_content.encode())
82 |         os.chmod(numa_executable.name, 0o777)
83 |         numa_executable.close()
84 | 
85 |         self.logger.info("\t+ Setting multiprocessing executable to numactl wrapper")
86 |         set_executable(numa_executable.name)
87 | 
88 |         yield
89 | 
90 |         self.logger.info("\t+ Resetting default multiprocessing executable")
91 |         os.unlink(numa_executable.name)
92 |         set_executable(sys.executable)
93 | 
94 | 
95 | def dummy_target() -> None:
96 |     exit(0)
97 | 


--------------------------------------------------------------------------------
/tests/test_energy_star.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from logging import getLogger
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output
 8 | 
 9 | LOGGER = getLogger("test-cli")
10 | 
11 | os.environ["TRANSFORMERS_IS_CI"] = "1"
12 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
13 | 
14 | TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star"
15 | TEST_CONFIG_NAMES = [
16 |     config.split(".")[0]
17 |     for config in os.listdir(TEST_CONFIG_DIR)
18 |     if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
19 | ]
20 | TEST_SCRIPT_PATHS = [
21 |     str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")
22 | ]
23 | 
24 | ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
25 | CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)
26 | 
27 | 
28 | @pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
29 | def test_cli_configs(config_name):
30 |     if config_name == "automatic_speech_recognition":
31 |         model = "optimum-internal-testing/tiny-random-whisper"
32 |     elif config_name == "image_classification":
33 |         model = "hf-internal-testing/tiny-random-ViTModel"
34 |     elif config_name == "image_to_text":
35 |         model = "hf-internal-testing/tiny-random-BlipModel"
36 |     elif config_name == "object_detection":
37 |         model = "hf-internal-testing/tiny-random-DetrModel"
38 |     elif config_name == "question_answering":
39 |         model = "hf-internal-testing/tiny-random-BertModel"
40 |     elif config_name == "sentence_similarity":
41 |         model = "hf-internal-testing/tiny-random-BertModel"
42 |     elif config_name == "text_classification":
43 |         model = "hf-internal-testing/tiny-random-BertModel"
44 |     elif config_name == "summarization":
45 |         model = "hf-internal-testing/tiny-random-BartModel"
46 |     elif config_name == "t5_question_answering":
47 |         model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
48 |     elif config_name == "t5_summarization":
49 |         model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
50 |     elif config_name == "t5_text_classification":
51 |         model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
52 |     elif config_name == "t5_text_generation":
53 |         model = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
54 |     elif config_name == "text_to_image":
55 |         model = "hf-internal-testing/tiny-stable-diffusion-torch"
56 |     elif config_name == "text_generation":
57 |         model = "tiny-random/gpt-oss"
58 |     else:
59 |         raise ValueError(f"Unknown config name: {config_name}")
60 | 
61 |     args = [
62 |         "optimum-benchmark",
63 |         "--config-dir",
64 |         TEST_CONFIG_DIR.as_posix(),
65 |         "--config-name",
66 |         config_name,
67 |         "backend.device=cpu",
68 |         "scenario.energy=true",
69 |         "scenario.memory=true",
70 |         "scenario.latency=true",
71 |         "scenario.num_samples=1",
72 |         "scenario.warmup_runs=1",
73 |         "scenario.input_shapes.batch_size=1",
74 |         "++scenario.generate_kwargs.max_new_tokens=16",
75 |         "++scenario.generate_kwargs.min_new_tokens=16",
76 |         "++scenario.call_kwargs.num_inference_steps=4",
77 |         "launcher.device_isolation=false",
78 |         "backend.device_map=null",
79 |         f"backend.model={model}",
80 |     ]
81 | 
82 |     if ROCR_VISIBLE_DEVICES is not None:
83 |         args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
84 |     elif CUDA_VISIBLE_DEVICES is not None:
85 |         args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']
86 | 
87 |     popen = run_subprocess_and_log_stream_output(LOGGER, args)
88 |     assert popen.returncode == 0, f"Failed to run {config_name}"
89 | 
90 | 
91 | @pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS)
92 | def test_api_scripts(script_path):
93 |     args = ["python", script_path]
94 | 
95 |     popen = run_subprocess_and_log_stream_output(LOGGER, args)
96 |     assert popen.returncode == 0, f"Failed to run {script_path}"
97 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/ipex/backend.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import OrderedDict
  3 | from tempfile import TemporaryDirectory
  4 | from typing import Any, Dict
  5 | 
  6 | import torch
  7 | from hydra.utils import get_class
  8 | 
  9 | from ...import_utils import is_accelerate_available, is_torch_distributed_available
 10 | from ..base import Backend
 11 | from ..transformers_utils import fast_weights_init
 12 | from .config import IPEXConfig
 13 | from .utils import TASKS_TO_IPEXMODELS
 14 | 
 15 | if is_accelerate_available():
 16 |     from accelerate import Accelerator
 17 | 
 18 | if is_torch_distributed_available():
 19 |     import torch.distributed
 20 | 
 21 | if not hasattr(os, "exit"):
 22 |     os.exit = os._exit
 23 | 
 24 | 
 25 | class IPEXBackend(Backend[IPEXConfig]):
 26 |     NAME: str = "ipex"
 27 | 
 28 |     def __init__(self, config: IPEXConfig) -> None:
 29 |         super().__init__(config)
 30 | 
 31 |         if self.config.task in TASKS_TO_IPEXMODELS:
 32 |             self.ipexmodel_class = get_class(TASKS_TO_IPEXMODELS[self.config.task])
 33 |             self.logger.info(f"\t+ Using IPEXModel class {self.ipexmodel_class.__name__}")
 34 |         else:
 35 |             raise NotImplementedError(f"IPEXBackend does not support task {self.config.task}")
 36 | 
 37 |     def load(self) -> None:
 38 |         self.logger.info("\t+ Creating backend temporary directory")
 39 |         self.tmpdir = TemporaryDirectory()
 40 | 
 41 |         if self.config.no_weights:
 42 |             self.logger.info("\t+ Creating no weights IPEXModel")
 43 |             self.create_no_weights_model_fast()
 44 |             self.logger.info("\t+ Loading no weights IPEXModel")
 45 |             self.load_ipexmodel_with_no_weights()
 46 |         else:
 47 |             self.logger.info("\t+ Loading pretrained IPEXModel")
 48 |             self.load_ipexmodel_from_pretrained()
 49 | 
 50 |         self.tmpdir.cleanup()
 51 | 
 52 |     def load_ipexmodel_from_pretrained(self) -> None:
 53 |         with torch.device(self.config.device):
 54 |             self.pretrained_model = self.ipexmodel_class.from_pretrained(
 55 |                 self.config.model,
 56 |                 **self.config.model_kwargs,
 57 |                 **self.ipexmodel_kwargs,
 58 |             )
 59 | 
 60 |     def load_ipexmodel_with_no_weights(self) -> None:
 61 |         with fast_weights_init():
 62 |             original_model, self.config.model = self.config.model, self.no_weights_model_path.as_posix()
 63 |             self.load_ipexmodel_from_pretrained()
 64 |             self.config.model = original_model
 65 | 
 66 |     @property
 67 |     def ipexmodel_kwargs(self) -> Dict[str, Any]:
 68 |         kwargs = {}
 69 | 
 70 |         if self.config.torch_dtype is not None:
 71 |             kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype)
 72 | 
 73 |         return kwargs
 74 | 
 75 |     @property
 76 |     def split_between_processes(self) -> bool:
 77 |         return is_torch_distributed_available() and torch.distributed.is_initialized()
 78 | 
 79 |     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
 80 |         if self.split_between_processes:
 81 |             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
 82 |                 inputs = process_inputs
 83 | 
 84 |         for key, value in inputs.items():
 85 |             if isinstance(value, torch.Tensor):
 86 |                 inputs[key] = value.to(self.config.device)
 87 | 
 88 |         return inputs
 89 | 
 90 |     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
 91 |         return self.pretrained_model.forward(**inputs, **kwargs)
 92 | 
 93 |     def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
 94 |         return self.pretrained_model.generate(**inputs, **kwargs)
 95 | 
 96 |     def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
 97 |         return self.pretrained_model.generate(**inputs, **kwargs)
 98 | 
 99 |     def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
100 |         return self.pretrained_model(**inputs, **kwargs)
101 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | .ruff_cache/
163 | .vscode/
164 | *.ipynb
165 | runs/
166 | sweeps/
167 | data/
168 | version.txt
169 | 
170 | .engine/
171 | work-in-progress/
172 | experiments/
173 | amdsmi/
174 | amd-*
175 | 
176 | # Code carbon
177 | generate_codecarbon.json
178 | task_codecarbon.json
179 | prefill_codecarbon.json
180 | 
181 | # Mac specific
182 | external_repos/
183 | .DS_Store
184 | outputs/
185 | 


--------------------------------------------------------------------------------
/optimum_benchmark/backends/onnxruntime/config.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from typing import Any, Dict, Optional
  3 | 
  4 | from ...import_utils import onnxruntime_version
  5 | from ...task_utils import TEXT_GENERATION_TASKS
  6 | from ..config import BackendConfig
  7 | 
  8 | QUANTIZATION_CONFIG = {
  9 |     "is_static": False,
 10 |     "format": "QOperator",
 11 |     # is_static and format are mandatory
 12 | }
 13 | 
 14 | CALIBRATION_CONFIG = {
 15 |     "method": "MinMax"
 16 |     # method is mandatory
 17 | }
 18 | 
 19 | AUTO_QUANTIZATION_CONFIG = {
 20 |     "is_static": False
 21 |     # is_static is mandatory
 22 | }
 23 | 
 24 | IO_BINDING_LIBRARIES = ["transformers", "timm"]
 25 | IO_BINDING_PROVIDERS = ["CPUExecutionProvider", "CUDAExecutionProvider"]
 26 | DEVICE_PROVIDER_MAP = {"cpu": "CPUExecutionProvider", "cuda": "CUDAExecutionProvider"}
 27 | 
 28 | 
 29 | @dataclass
 30 | class ONNXRuntimeConfig(BackendConfig):
 31 |     name: str = "onnxruntime"
 32 |     version: Optional[str] = onnxruntime_version()
 33 |     _target_: str = "optimum_benchmark.backends.onnxruntime.backend.ONNXRuntimeBackend"
 34 | 
 35 |     # load options
 36 |     no_weights: bool = False
 37 | 
 38 |     # ortmodel kwargs
 39 |     export: Optional[bool] = None
 40 |     provider: Optional[str] = None
 41 |     use_cache: Optional[bool] = None
 42 |     use_merged: Optional[bool] = None
 43 |     torch_dtype: Optional[str] = None
 44 |     use_io_binding: Optional[bool] = None
 45 |     session_options: Dict[str, Any] = field(default_factory=dict)
 46 |     provider_options: Dict[str, Any] = field(default_factory=dict)
 47 | 
 48 |     # null, O1, O2, O3, O4
 49 |     auto_optimization: Optional[str] = None
 50 |     auto_optimization_config: Dict[str, Any] = field(default_factory=dict)
 51 | 
 52 |     # null, arm64, avx2, avx512, avx512_vnni, tensorrt
 53 |     auto_quantization: Optional[str] = None
 54 |     auto_quantization_config: Dict[str, Any] = field(default_factory=dict)
 55 | 
 56 |     # minmax, entropy, l2norm, percentiles
 57 |     auto_calibration: Optional[str] = None
 58 |     auto_calibration_config: Dict[str, Any] = field(default_factory=dict)
 59 | 
 60 |     # manual optimization options
 61 |     optimization: bool = False
 62 |     optimization_config: Dict[str, Any] = field(default_factory=dict)
 63 | 
 64 |     # manual quantization options
 65 |     quantization: bool = False
 66 |     quantization_config: Dict[str, Any] = field(default_factory=dict)
 67 | 
 68 |     # manual calibration options
 69 |     calibration: bool = False
 70 |     calibration_config: Dict[str, Any] = field(default_factory=dict)
 71 | 
 72 |     def __post_init__(self):
 73 |         super().__post_init__()
 74 | 
 75 |         if self.device not in ["cpu", "cuda"]:
 76 |             raise ValueError(f"ONNXRuntimeBackend only supports CPU and CUDA devices, got {self.device}")
 77 | 
 78 |         if not self.no_weights and not self.export and self.torch_dtype is not None:
 79 |             raise NotImplementedError("Can't convert an exported model's weights to a different dtype.")
 80 | 
 81 |         if self.provider is None:
 82 |             self.provider = DEVICE_PROVIDER_MAP[self.device]
 83 | 
 84 |         if self.use_io_binding is None:
 85 |             self.use_io_binding = self.provider in IO_BINDING_PROVIDERS and self.library in IO_BINDING_LIBRARIES
 86 | 
 87 |         if self.provider == "TensorrtExecutionProvider" and self.task in TEXT_GENERATION_TASKS:
 88 |             raise NotImplementedError("we don't support TensorRT for text generation tasks")
 89 | 
 90 |         if self.quantization:
 91 |             self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
 92 |             # raise ValueError if the quantization is static but calibration is not enabled
 93 |             if self.quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
 94 |                 raise ValueError(
 95 |                     "Quantization is static but calibration is not enabled. "
 96 |                     "Please enable calibration or disable static quantization."
 97 |                 )
 98 | 
 99 |         if self.auto_quantization is not None:
100 |             self.auto_quantization_config = {**AUTO_QUANTIZATION_CONFIG, **self.auto_quantization_config}
101 |             if self.auto_quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
102 |                 raise ValueError(
103 |                     "Quantization is static but calibration is not enabled. "
104 |                     "Please enable calibration or disable static quantization."
105 |                 )
106 | 
107 |         if self.calibration:
108 |             self.calibration_config = {**CALIBRATION_CONFIG, **self.calibration_config}
109 | 


--------------------------------------------------------------------------------
/optimum_benchmark/scenarios/inference/config.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | from logging import getLogger
  3 | from typing import Any, Dict, Optional
  4 | 
  5 | from ...system_utils import is_rocm_system
  6 | from ..config import ScenarioConfig
  7 | 
  8 | LOGGER = getLogger("inference")
  9 | 
 10 | INPUT_SHAPES = {
 11 |     "batch_size": 2,
 12 | }
 13 | 
 14 | 
 15 | @dataclass
 16 | class InferenceConfig(ScenarioConfig):
 17 |     name: str = "inference"
 18 |     _target_: str = "optimum_benchmark.scenarios.inference.scenario.InferenceScenario"
 19 | 
 20 |     # benchmark options
 21 |     iterations: int = field(
 22 |         default=10,
 23 |         metadata={
 24 |             "help": "Minimum number of iterations to run the benchmark. "
 25 |             "The number of tracked inferences will be at least this value."
 26 |             "Set to 0 to disable this constraint (benchmark will run for `duration` seconds)."
 27 |         },
 28 |     )
 29 |     duration: int = field(
 30 |         default=10,
 31 |         metadata={
 32 |             "help": "Minimum duration of the benchmark in seconds. "
 33 |             "The sum of tracked inferences will be at least this value."
 34 |             "Set to 0 to disable this constraint (benchmark will run for `iterations` iterations)."
 35 |         },
 36 |     )
 37 |     warmup_runs: int = field(
 38 |         default=10,
 39 |         metadata={"help": "Number of warmup runs to perform before benchmarking."},
 40 |     )
 41 | 
 42 |     # input/output config
 43 |     input_shapes: Dict[str, Any] = field(
 44 |         default_factory=dict,
 45 |         metadata={"help": "Input shapes for the model. Missing keys will be filled with default values."},
 46 |     )
 47 |     new_tokens: Optional[int] = field(
 48 |         default=None,
 49 |         metadata={"help": "If set, `max_new_tokens` and `min_new_tokens` will be set to this value."},
 50 |     )
 51 | 
 52 |     # tracking options
 53 |     memory: bool = field(default=False, metadata={"help": "Measure max memory usage"})
 54 |     latency: bool = field(default=True, metadata={"help": "Measure latencies and throughputs"})
 55 |     energy: bool = field(default=False, metadata={"help": "Measure energy usage and efficiency"})
 56 | 
 57 |     # methods kwargs
 58 |     forward_kwargs: Dict[str, Any] = field(
 59 |         default_factory=dict, metadata={"help": "Keyword arguments to pass to the forward method of the backend."}
 60 |     )
 61 |     generate_kwargs: Dict[str, Any] = field(
 62 |         default_factory=dict, metadata={"help": "Keyword arguments to pass to the generate method of the backend."}
 63 |     )
 64 |     call_kwargs: Dict[str, Any] = field(
 65 |         default_factory=dict, metadata={"help": "Keyword arguments to pass to the call method of the backend."}
 66 |     )
 67 | 
 68 |     def __post_init__(self):
 69 |         super().__post_init__()
 70 | 
 71 |         self.input_shapes = {**INPUT_SHAPES, **self.input_shapes}
 72 | 
 73 |         if self.new_tokens is not None:
 74 |             LOGGER.warning(
 75 |                 "`new_tokens` is deprecated. Use `max_new_tokens` and `min_new_tokens` instead. "
 76 |                 "Setting `max_new_tokens` and `min_new_tokens` to `new_tokens`."
 77 |             )
 78 |             self.generate_kwargs["max_new_tokens"] = self.new_tokens
 79 |             self.generate_kwargs["min_new_tokens"] = self.new_tokens
 80 | 
 81 |         if (
 82 |             "max_new_tokens" in self.generate_kwargs
 83 |             and "min_new_tokens" in self.generate_kwargs
 84 |             and self.generate_kwargs["max_new_tokens"] != self.generate_kwargs["min_new_tokens"]
 85 |         ):
 86 |             raise ValueError(
 87 |                 "Setting `min_new_tokens` and `max_new_tokens` to different values results in non-deterministic behavior."
 88 |             )
 89 | 
 90 |         elif "max_new_tokens" in self.generate_kwargs and "min_new_tokens" not in self.generate_kwargs:
 91 |             LOGGER.warning(
 92 |                 "Setting `max_new_tokens` without `min_new_tokens` results in non-deterministic behavior. "
 93 |                 "Setting `min_new_tokens` to `max_new_tokens`."
 94 |             )
 95 |             self.generate_kwargs["min_new_tokens"] = self.generate_kwargs["max_new_tokens"]
 96 | 
 97 |         elif "min_new_tokens" in self.generate_kwargs and "max_new_tokens" not in self.generate_kwargs:
 98 |             LOGGER.warning(
 99 |                 "Setting `min_new_tokens` without `max_new_tokens` results in non-deterministic behavior. "
100 |                 "Setting `max_new_tokens` to `min_new_tokens`."
101 |             )
102 |             self.generate_kwargs["max_new_tokens"] = self.generate_kwargs["min_new_tokens"]
103 | 
104 |         if self.energy and is_rocm_system():
105 |             raise ValueError("Energy measurement through codecarbon is not yet available on ROCm-powered devices.")
106 | 


--------------------------------------------------------------------------------