├── .azure_pipelines
    ├── build-doc.yaml
    ├── dockerfiles
    │   ├── linux-cpu.dockerfile
    │   └── linux-gpu.dockerfile
    ├── job_templates
    │   ├── build-docker-image-template.yaml
    │   ├── huggingface-login-template.yaml
    │   ├── olive-build-doc-template.yaml
    │   ├── olive-example-cpu-template.yaml
    │   ├── olive-example-linux-gpu-template.yaml
    │   ├── olive-setup-template.yaml
    │   ├── olive-test-cpu-template.yaml
    │   └── olive-test-linux-gpu-template.yaml
    ├── olive-aml-ci.yaml
    ├── olive-ci.yaml
    ├── olive-examples.yaml
    ├── olive-ort-nightly.yaml
    ├── package_publish.yaml
    └── scripts
    │   ├── client_patch.py
    │   ├── find_failed_commit.py
    │   ├── requirements.txt
    │   └── run_test.sh
├── .coveragerc
├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.yaml
    ├── pull_request_template.md
    └── workflows
    │   ├── codeql.yml
    │   └── lint.yml
├── .gitignore
├── .lintrunner.toml
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── NEWS.md
├── NOTICE.txt
├── README.md
├── SECURITY.md
├── assets
    └── cost_models
    │   ├── Llama-2-13B.csv
    │   ├── Llama-2-7B.csv
    │   ├── Llama-3.1-8B.csv
    │   ├── Phi-3-mini.csv
    │   └── Phi-3.5-mini.csv
├── docs
    ├── Makefile
    ├── README.md
    ├── architecture.md
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _static
    │       ├── css
    │       │   └── header.css
    │       └── js
    │       │   └── custom_version.js
    │   ├── conf.py
    │   ├── dump_schema.py
    │   ├── examples.md
    │   ├── extending
    │       ├── custom-model-evaluator.md
    │       ├── custom-scripts.md
    │       ├── design.md
    │       ├── how-to-add-optimization-pass.md
    │       ├── index.rst
    │       └── python_interface.md
    │   ├── exts
    │       ├── auto_config_doc
    │       │   └── __init__.py
    │       └── gallery_directive.py
    │   ├── features
    │       ├── auto-opt.md
    │       ├── azure-ai
    │       │   ├── azure-ai.md
    │       │   ├── azure-arc.md
    │       │   ├── azure-script.md
    │       │   ├── index.rst
    │       │   ├── remote-workflow.md
    │       │   └── shared-model-cache.md
    │       ├── huggingface-integration.md
    │       ├── ihv-integration
    │       │   ├── index.rst
    │       │   ├── openvino.md
    │       │   ├── qnn.md
    │       │   └── snpe.md
    │       ├── index.rst
    │       ├── model-compression.md
    │       ├── model-conversion
    │       │   ├── convert-onnx.md
    │       │   ├── convert-pytorch.md
    │       │   └── index.rst
    │       ├── model-splitting.md
    │       ├── onnx-transformations.md
    │       ├── peft-adapters.md
    │       └── quantization.md
    │   ├── getting-started
    │       └── getting-started.md
    │   ├── how-to
    │       ├── cli
    │       │   ├── cli-auto-opt.md
    │       │   ├── cli-finetune.md
    │       │   ├── cli-quantize.md
    │       │   └── cli-run.md
    │       ├── configure-workflows
    │       │   ├── build-workflow.md
    │       │   ├── engine-configuration.md
    │       │   ├── how-to-configure-data.md
    │       │   ├── how-to-configure-model.md
    │       │   ├── metrics-configuration.md
    │       │   ├── model-packaging.md
    │       │   ├── pass-configuration.md
    │       │   └── systems.md
    │       ├── index.rst
    │       └── installation.md
    │   ├── images
    │       ├── auto_opt
    │       │   └── pass_flows.png
    │       ├── azure_arc
    │       │   ├── add-infra.png
    │       │   ├── add-kub-detail.png
    │       │   ├── add-kub-to-arc.png
    │       │   ├── add-kub.png
    │       │   ├── attach-kub.png
    │       │   ├── attach-suc.png
    │       │   └── new-compute.png
    │       ├── datacontainer_example.png
    │       ├── dataset-flow.png
    │       ├── model_splitting
    │       │   ├── cost_model.png
    │       │   └── num_splits.png
    │       ├── multi-lora-diagram.png
    │       ├── olive-black-text.png
    │       ├── olive-design.png
    │       ├── olive-flow.png
    │       └── olive-white-text.png
    │   ├── index.md
    │   ├── reference
    │       ├── cli.rst
    │       ├── index.rst
    │       ├── options.md
    │       └── pass.rst
    │   └── why-olive.md
├── examples
    ├── README.md
    ├── __init__.py
    ├── adetailer
    │   ├── README.md
    │   ├── face_yolo_qnn.json
    │   ├── requirements.txt
    │   └── user_script.py
    ├── ast
    │   ├── README.md
    │   ├── ast.json
    │   └── requirements.txt
    ├── bert
    │   ├── .gitignore
    │   ├── README.md
    │   ├── bert.py
    │   ├── bert_cuda_gpu.template.json
    │   ├── bert_inc_dynamic_ptq_cpu.json
    │   ├── bert_inc_ptq_cpu.json
    │   ├── bert_inc_smoothquant_ptq_cpu.json
    │   ├── bert_inc_static_ptq_cpu.json
    │   ├── bert_ptq_cpu.json
    │   ├── bert_ptq_cpu_aml.json
    │   ├── bert_ptq_qdq.json
    │   ├── bert_ptq_qdq_vitis_ai.json
    │   ├── bert_qat_customized_train_loop_cpu.json
    │   ├── bert_trt_gpu.json
    │   ├── bert_trtrtx_gpu.json
    │   ├── conda.yaml
    │   ├── conda_gpu.yaml
    │   ├── docker
    │   │   └── Dockerfile
    │   ├── google_bert_qdq.json
    │   ├── google_bert_qdq_vitis_ai.json
    │   ├── google_bert_trtrtx.json
    │   ├── notebook
    │   │   ├── bert_auto_opt_gpu.json
    │   │   └── multi_ep_search.ipynb
    │   ├── openvino
    │   │   ├── README.md
    │   │   ├── bert_base_multilingual_cased
    │   │   │   ├── README.md
    │   │   │   ├── bert-base-multilingual-cased_context_ov_static.json
    │   │   │   └── user_script.py
    │   │   └── bert_base_uncased_mrpc
    │   │   │   ├── README.md
    │   │   │   ├── bert-base-uncased-mrpc_context_ov_static.json
    │   │   │   └── user_script.py
    │   ├── qnn
    │   │   ├── README.md
    │   │   ├── bert_common.py
    │   │   ├── google_bert_qnn.py
    │   │   ├── google_bert_qnn_fp32.json
    │   │   ├── google_bert_qnn_fp32_ctx.json
    │   │   ├── google_bert_qnn_qdq.json
    │   │   ├── google_bert_qnn_qdq_ctx.json
    │   │   ├── intel_bert_qnn_fp32.json
    │   │   ├── intel_bert_qnn_fp32_cxt.json
    │   │   ├── intel_bert_qnn_qdq.json
    │   │   ├── intel_bert_qnn_qdq_cxt.json
    │   │   ├── requirements.txt
    │   │   └── wikitext.py
    │   ├── requirements.txt
    │   ├── snpe
    │   │   ├── README.md
    │   │   ├── bert_snpe.json
    │   │   └── user_script.py
    │   └── user_script.py
    ├── bge
    │   ├── bge-small-en-v1.5_ptq_qnn.json
    │   ├── readme.md
    │   ├── requirements.txt
    │   └── user_script.py
    ├── clip
    │   ├── README.md
    │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq.json
    │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq_vitis_ai.json
    │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_trtrtx.json
    │   ├── openai_clip-vit-base-patch16_ptq_qdq.json
    │   ├── openai_clip-vit-base-patch16_ptq_qdq_vitis_ai.json
    │   ├── openai_clip-vit-base-patch16_trtrtx.json
    │   ├── openai_clip-vit-base-patch32_ptq_qdq.json
    │   ├── openai_clip-vit-base-patch32_ptq_qdq_vitis_ai.json
    │   ├── openai_clip-vit-base-patch32_trtrtx.json
    │   ├── openvino
    │   │   ├── README.md
    │   │   ├── clip_vit_b32_laion2b_s34B_b79k_context_ov_static.json
    │   │   ├── clip_vit_base_patch16_context_ov_static.json
    │   │   ├── clip_vit_base_patch32_context_ov_static.json
    │   │   └── user_script.py
    │   ├── qnn
    │   │   ├── README.md
    │   │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_fp32.json
    │   │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_fp32_ctx.json
    │   │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_qdq.json
    │   │   ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_qdq_ctx.json
    │   │   ├── openai_clip-vit-base-patch16_ptq_qnn_fp32.json
    │   │   ├── openai_clip-vit-base-patch16_ptq_qnn_fp32_ctx.json
    │   │   ├── openai_clip-vit-base-patch16_ptq_qnn_qdq.json
    │   │   ├── openai_clip-vit-base-patch16_ptq_qnn_qdq_ctx.json
    │   │   ├── openai_clip-vit-base-patch32_ptq_qnn_fp32.json
    │   │   ├── openai_clip-vit-base-patch32_ptq_qnn_fp32_ctx.json
    │   │   ├── openai_clip-vit-base-patch32_ptq_qnn_qdq.json
    │   │   ├── openai_clip-vit-base-patch32_ptq_qnn_qdq_ctx.json
    │   │   ├── requirements.txt
    │   │   └── user_script.py
    │   ├── requirements.txt
    │   └── user_script.py
    ├── deberta
    │   ├── README.md
    │   ├── deberta.json
    │   └── requirements.txt
    ├── deepseek
    │   ├── README.md
    │   └── openvino
    │   │   ├── DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json
    │   │   └── README.md
    ├── directml
    │   ├── README.md
    │   ├── llm
    │   │   ├── .gitignore
    │   │   ├── README.md
    │   │   ├── chat_app
    │   │   │   ├── __init__.py
    │   │   │   ├── app.py
    │   │   │   ├── app_modules
    │   │   │   │   ├── overwrites.py
    │   │   │   │   ├── presets.py
    │   │   │   │   └── utils.py
    │   │   │   ├── assets
    │   │   │   │   ├── custom.css
    │   │   │   │   └── custom.js
    │   │   │   └── interface
    │   │   │   │   ├── base_interface.py
    │   │   │   │   └── hddr_llm_onnx_dml_interface.py
    │   │   ├── chat_templates.py
    │   │   ├── config.py
    │   │   ├── config_llm.json
    │   │   ├── decoder_model.py
    │   │   ├── falcon.py
    │   │   ├── llava_model.py
    │   │   ├── llm.py
    │   │   ├── model_type_mapping.py
    │   │   ├── phi.py
    │   │   ├── phi3.py
    │   │   ├── placeholder.png
    │   │   ├── requirements.txt
    │   │   ├── run_llm_batched_io_binding.py
    │   │   ├── run_llm_io_binding.py
    │   │   ├── run_vision_llm_io_binding.py
    │   │   └── user_script.py
    │   ├── squeezenet
    │   │   ├── README.md
    │   │   ├── squeezenet_config.json
    │   │   └── user_script.py
    │   ├── stable_diffusion
    │   │   ├── README.md
    │   │   └── readme
    │   │   │   └── pipeline.png
    │   └── stable_diffusion_xl
    │   │   ├── README.md
    │   │   └── readme
    │   │       ├── pipeline.png
    │   │       └── sdxl_flow.png
    ├── falcon
    │   ├── README.md
    │   ├── config.json
    │   └── requirements.txt
    ├── getting_started
    │   ├── README.md
    │   ├── olive-awq-ft-llama.ipynb
    │   ├── olive-deepseek-finetune.ipynb
    │   ├── olive_quickstart.ipynb
    │   └── text-gen-optimized-slms.ipynb
    ├── gptj
    │   ├── README.md
    │   ├── gptj_inc_dynamic_ptq_cpu.json
    │   ├── gptj_inc_static_ptq_cpu.json
    │   ├── requirements.txt
    │   └── user_script.py
    ├── gte
    │   ├── README.md
    │   ├── config.json
    │   └── user_script.py
    ├── llama2
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── USE-POLICY-META-LLAMA-2.md
    │   ├── conda_gpu.yaml
    │   ├── llama2.py
    │   ├── llama2_generate.json
    │   ├── llama2_lmeval.json
    │   ├── llama2_lmeval_onnx.json
    │   ├── llama2_model_builder.py
    │   ├── llama2_model_builder_template.json
    │   ├── llama2_multilora.ipynb
    │   ├── llama2_qlora.json
    │   ├── llama2_split.json
    │   ├── llama2_template.json
    │   ├── llama2_tensor_parallel.json
    │   ├── notebook
    │   │   ├── llama2
    │   │   │   ├── conda.yaml
    │   │   │   ├── config.json
    │   │   │   └── notebook.ipynb
    │   │   └── llama2_multiep
    │   │   │   ├── config_cpu.template.json
    │   │   │   ├── config_gpu.template.json
    │   │   │   ├── config_multi_ep.template.json
    │   │   │   ├── llama2.py
    │   │   │   ├── multiple_ep_requirements.txt
    │   │   │   └── notebook.ipynb
    │   ├── requirements-gptq.txt
    │   ├── requirements-pipeline.txt
    │   ├── requirements-qlora.txt
    │   ├── requirements.txt
    │   ├── tensor_parallel_generate.py
    │   └── tensor_parallel_inference.py
    ├── llama3
    │   ├── README.md
    │   └── openvino
    │   │   ├── Llama-3.2-1B-Instruct_context_ov_dynamic_sym_bkp_int8_sym.json
    │   │   └── README.md
    ├── mistral
    │   ├── README.md
    │   ├── mistral.py
    │   ├── mistral_fp16.json
    │   ├── mistral_int4.json
    │   ├── requirements.txt
    │   └── user_script.py
    ├── mobilenet
    │   ├── .gitignore
    │   ├── onnx
    │   │   ├── README.md
    │   │   ├── config.json
    │   │   ├── imagenet.py
    │   │   ├── requirements.txt
    │   │   └── user_script.py
    │   └── qnn
    │   │   ├── README.md
    │   │   ├── download_files.py
    │   │   ├── mobilenet_qnn_ep.json
    │   │   ├── requirements.txt
    │   │   └── user_script.py
    ├── open_llama
    │   ├── README.md
    │   ├── conda.yaml
    │   ├── open_llama_arc.json
    │   ├── open_llama_config.json
    │   ├── open_llama_inc_woq.json
    │   ├── open_llama_sparsegpt_gpu.json
    │   ├── requirements-arc.txt
    │   ├── requirements-sparsegpt.txt
    │   ├── requirements-woq.txt
    │   ├── requirements.txt
    │   └── user_script.py
    ├── opt_125m
    │   ├── README.md
    │   ├── awq.json
    │   ├── awq_onnx.json
    │   ├── gptq.json
    │   ├── gptq_onnx.json
    │   ├── requirements-awq.txt
    │   ├── requirements-gptq.txt
    │   └── requirements.txt
    ├── phi2
    │   ├── .gitignore
    │   ├── README.md
    │   ├── generate.py
    │   ├── phi2.py
    │   ├── phi2_genai.json
    │   ├── phi2_optimize_template.json
    │   ├── requirements-lora.txt
    │   ├── requirements-pipeline.txt
    │   ├── requirements-slicegpt.txt
    │   └── requirements.txt
    ├── phi3
    │   ├── .gitignore
    │   ├── README.md
    │   ├── README_VISION.md
    │   ├── phi3.py
    │   ├── phi3_nvmo_ptq.json
    │   ├── phi3_template.json
    │   ├── phi3_vision.py
    │   ├── requirements-awq.txt
    │   ├── requirements-nvmo-awq.txt
    │   ├── requirements-quarot.txt
    │   ├── requirements-vision.txt
    │   ├── requirements.txt
    │   └── vision
    │   │   ├── config_templates
    │   │       ├── text_config.json
    │   │       ├── text_embedding_config.json
    │   │       └── vision_config.json
    │   │   └── scripts
    │   │       ├── prepare_phi3_vision_for_olive.sh
    │   │       └── user_script.py
    ├── phi3_5
    │   ├── README.md
    │   ├── app.py
    │   ├── openvino
    │   │   ├── Phi-3.5-mini-instruct_context_ov_dynamic_sym_gs128_bkp_int8_sym.json
    │   │   └── README.md
    │   ├── qdq_config.json
    │   ├── qdq_config_vitis_ai.json
    │   ├── qnn_config.json
    │   └── requirements.txt
    ├── phi4
    │   ├── README.md
    │   └── openvino
    │   │   ├── README.md
    │   │   ├── phi_4_mini_reasoning
    │   │       ├── Phi-4-mini-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json
    │   │       └── README.md
    │   │   ├── phi_4_reasoning
    │   │       ├── Phi-4-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json
    │   │       └── README.md
    │   │   └── phi_4_reasoning_plus
    │   │       ├── Phi-4-reasoning-plus_context_ov_dynamic_sym_gs128_bkp_int8_sym.json
    │   │       └── README.md
    ├── qwen2_5
    │   ├── README.md
    │   └── openvino
    │   │   ├── Qwen2.5-1.5B-instruct_context_ov_dynamic_sym_bkp_int8_sym_r1.json
    │   │   └── README.md
    ├── red_pajama
    │   ├── README.md
    │   ├── config.json
    │   ├── requirements.txt
    │   └── user_script.py
    ├── resnet
    │   ├── README.md
    │   ├── conda.yaml
    │   ├── imagenet.py
    │   ├── multiple_ep_requirements.txt
    │   ├── openvino
    │   │   ├── README.md
    │   │   ├── imagenet.py
    │   │   ├── requirements.txt
    │   │   └── resnet_context_ov_static.json
    │   ├── prepare_model_data.py
    │   ├── qnn
    │   │   ├── README.md
    │   │   ├── imagenet.py
    │   │   ├── requirements.txt
    │   │   ├── resnet_ptq_qnn_fp32.json
    │   │   ├── resnet_ptq_qnn_fp32_ctx.json
    │   │   ├── resnet_ptq_qnn_qdq.json
    │   │   └── resnet_ptq_qnn_qdq_ctx.json
    │   ├── requirements.txt
    │   ├── resnet_dynamic_ptq_cpu.json
    │   ├── resnet_multiple_ep.json
    │   ├── resnet_ptq_cpu.json
    │   ├── resnet_ptq_cpu_aml_dataset.json
    │   ├── resnet_ptq_qdq.json
    │   ├── resnet_ptq_qdq_vitis_ai.json
    │   ├── resnet_qat_default_train_loop_cpu.json
    │   ├── resnet_qat_lightning_module_cpu.json
    │   ├── resnet_static_ptq_cpu.json
    │   ├── resnet_trtrtx.json
    │   ├── resnet_vitis_ai_ptq_cpu.json
    │   └── user_script.py
    ├── sentence_transformers
    │   ├── eval_stsb.py
    │   ├── readme.md
    │   └── sentence_transformer_config.json
    ├── stable_diffusion
    │   ├── .gitignore
    │   ├── README.md
    │   ├── assets
    │   │   └── dog.png
    │   ├── config_safety_checker.json
    │   ├── config_text_encoder.json
    │   ├── config_unet.json
    │   ├── config_vae_decoder.json
    │   ├── config_vae_encoder.json
    │   ├── evaluation.py
    │   ├── notebook
    │   │   ├── .gitignore
    │   │   ├── image
    │   │   │   ├── result_pen.png
    │   │   │   ├── result_pen_merge.png
    │   │   │   ├── result_wolf.png
    │   │   │   └── result_wolf_merge.png
    │   │   ├── sd_multilora.ipynb
    │   │   ├── text_encoder.py
    │   │   ├── text_encoder2.py
    │   │   ├── unet_pen_sketch.py
    │   │   ├── unet_wolf_plushie.py
    │   │   ├── vae_decoder.py
    │   │   └── vae_encoder.py
    │   ├── requirements-common.txt
    │   ├── requirements-ov.txt
    │   ├── requirements.txt
    │   ├── sd_utils
    │   │   ├── config.py
    │   │   ├── ort.py
    │   │   ├── ov.py
    │   │   └── qdq.py
    │   ├── stable_diffusion.py
    │   └── user_script.py
    ├── stable_diffusion_xl
    │   ├── .gitignore
    │   ├── README.md
    │   ├── config.py
    │   ├── config_text_encoder.json
    │   ├── config_text_encoder_2.json
    │   ├── config_unet.json
    │   ├── config_vae_decoder.json
    │   ├── config_vae_encoder.json
    │   ├── requirements-common.txt
    │   ├── requirements.txt
    │   ├── stable_diffusion_xl.py
    │   └── user_script.py
    ├── super_resolution
    │   ├── README.md
    │   ├── config.json
    │   ├── loader.py
    │   └── requirements.txt
    ├── table_transformer_detection
    │   ├── README.md
    │   ├── prepare_datasets.py
    │   ├── ttd.py
    │   └── ttd_config.json
    ├── test
    │   ├── __init__.py
    │   ├── azureml
    │   │   ├── __init__.py
    │   │   ├── test_bert_ptq_cpu_aml.py
    │   │   ├── test_llama2.py
    │   │   ├── test_resnet_ptq_cpu_aml.py
    │   │   └── test_resnet_vitis_ai_ptq_cpu_aml.py
    │   ├── local
    │   │   ├── __init__.py
    │   │   ├── test_ast.py
    │   │   ├── test_bert_cuda_gpu.py
    │   │   ├── test_bert_inc.py
    │   │   ├── test_bert_ptq_cpu.py
    │   │   ├── test_bert_ptq_cpu_docker.py
    │   │   ├── test_deberta.py
    │   │   ├── test_llama2.py
    │   │   ├── test_mistral_fp16.py
    │   │   ├── test_mobilenet.py
    │   │   ├── test_mobilenet_qnn_ep.py
    │   │   ├── test_phi2.py
    │   │   ├── test_resnet_ptq_cpu.py
    │   │   ├── test_resnet_qat.py
    │   │   ├── test_resnet_vitis_ai_ptq_cpu.py
    │   │   ├── test_stable_diffusion_cuda_gpu.py
    │   │   └── test_super_resolution.py
    │   └── utils.py
    ├── utils
    │   ├── generator.py
    │   └── kv_cache_utils.py
    ├── vgg
    │   ├── .gitignore
    │   ├── README.md
    │   ├── download_files.py
    │   ├── prepare_config.py
    │   ├── requirements.txt
    │   └── vgg_config.json
    └── vit
    │   ├── README.md
    │   ├── imagenet.py
    │   ├── openvino
    │       ├── README.md
    │       ├── imagenet.py
    │       ├── requirements.txt
    │       └── vit_base_patch16_224_context_ov_static.json
    │   ├── qnn
    │       ├── README.md
    │       ├── imagenet.py
    │       ├── requirements.txt
    │       ├── val_tiny_imagenet
    │       │   ├── val_tiny_imagenet.py
    │       │   └── vit_id2label.json
    │       ├── vit_qnn_fp32.json
    │       ├── vit_qnn_fp32_ctx.json
    │       ├── vit_qnn_qdq.json
    │       └── vit_qnn_qdq_ctx.json
    │   ├── vit_qdq.json
    │   ├── vit_qdq_vitis_ai.json
    │   └── vit_trtrtx.json
├── olive
    ├── __init__.py
    ├── __main__.py
    ├── auto_optimizer
    │   ├── __init__.py
    │   ├── config_template
    │   │   ├── opt_level_passes.yaml
    │   │   └── pass_capability.yaml
    │   ├── regulate_mixins.py
    │   └── template_mapping.py
    ├── azureml
    │   ├── __init__.py
    │   └── azureml_client.py
    ├── cache.py
    ├── cli
    │   ├── __init__.py
    │   ├── auto_opt.py
    │   ├── base.py
    │   ├── capture_onnx.py
    │   ├── configure_qualcomm_sdk.py
    │   ├── constants.py
    │   ├── convert_adapters.py
    │   ├── extract_adapters.py
    │   ├── finetune.py
    │   ├── generate_adapter.py
    │   ├── generate_cost_model.py
    │   ├── launcher.py
    │   ├── manage_aml_compute.py
    │   ├── quantize.py
    │   ├── run.py
    │   ├── session_params_tuning.py
    │   └── shared_cache.py
    ├── common
    │   ├── __init__.py
    │   ├── auto_config.py
    │   ├── config_utils.py
    │   ├── constants.py
    │   ├── container_client_factory.py
    │   ├── hf
    │   │   ├── __init__.py
    │   │   ├── login.py
    │   │   ├── mappings.py
    │   │   ├── mlflow.py
    │   │   ├── model_io.py
    │   │   ├── peft.py
    │   │   ├── quant.py
    │   │   ├── utils.py
    │   │   └── wrapper.py
    │   ├── import_lib.py
    │   ├── ort_inference.py
    │   ├── pydantic_v1.py
    │   ├── user_module_loader.py
    │   └── utils.py
    ├── constants.py
    ├── data
    │   ├── __init__.py
    │   ├── component
    │   │   ├── __init__.py
    │   │   ├── dataloader.py
    │   │   ├── dataset.py
    │   │   ├── load_dataset.py
    │   │   ├── post_process_data.py
    │   │   ├── pre_process_data.py
    │   │   └── text_generation.py
    │   ├── config.py
    │   ├── constants.py
    │   ├── container
    │   │   ├── __init__.py
    │   │   ├── data_container.py
    │   │   ├── dummy_data_container.py
    │   │   ├── huggingface_container.py
    │   │   └── raw_data_container.py
    │   ├── registry.py
    │   └── template.py
    ├── engine
    │   ├── __init__.py
    │   ├── config.py
    │   ├── engine.py
    │   ├── footprint.py
    │   ├── output.py
    │   └── packaging
    │   │   ├── Dockerfile.base
    │   │   ├── __init__.py
    │   │   ├── packaging_config.py
    │   │   └── packaging_generator.py
    ├── evaluator
    │   ├── __init__.py
    │   ├── accuracy.py
    │   ├── lmeval_onnx_model.py
    │   ├── metric.py
    │   ├── metric_backend.py
    │   ├── metric_config.py
    │   ├── metric_result.py
    │   ├── olive_evaluator.py
    │   └── registry.py
    ├── exception
    │   └── __init__.py
    ├── hardware
    │   ├── __init__.py
    │   ├── accelerator.py
    │   └── constants.py
    ├── logging.py
    ├── model
    │   ├── __init__.py
    │   ├── config
    │   │   ├── __init__.py
    │   │   ├── hf_config.py
    │   │   ├── io_config.py
    │   │   ├── kv_cache_config.py
    │   │   ├── model_config.py
    │   │   └── registry.py
    │   ├── handler
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── composite.py
    │   │   ├── hf.py
    │   │   ├── mixin
    │   │   │   ├── __init__.py
    │   │   │   ├── dummy_inputs.py
    │   │   │   ├── hf.py
    │   │   │   ├── io_config.py
    │   │   │   ├── json.py
    │   │   │   ├── kv_cache.py
    │   │   │   ├── mlflow.py
    │   │   │   ├── onnx_ep.py
    │   │   │   └── resource.py
    │   │   ├── onnx.py
    │   │   ├── openvino.py
    │   │   ├── pytorch.py
    │   │   ├── qnn.py
    │   │   ├── snpe.py
    │   │   └── tensorflow.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── onnx_utils.py
    │   │   └── path_utils.py
    ├── olive_config.json
    ├── package_config.py
    ├── passes
    │   ├── __init__.py
    │   ├── olive_pass.py
    │   ├── onnx
    │   │   ├── __init__.py
    │   │   ├── append_pre_post_processing_ops.py
    │   │   ├── bnb_quantization.py
    │   │   ├── common.py
    │   │   ├── compose.py
    │   │   ├── context_binary.py
    │   │   ├── conversion.py
    │   │   ├── dynamic_to_fixed_shape.py
    │   │   ├── extract_adapters.py
    │   │   ├── float16_conversion.py
    │   │   ├── graph_surgeries.py
    │   │   ├── hqq_quantization.py
    │   │   ├── inc_quantization.py
    │   │   ├── io_datatype_converter.py
    │   │   ├── merge_decoders.py
    │   │   ├── mixed_precision.py
    │   │   ├── mixed_precision_overrides.py
    │   │   ├── mnb_to_qdq.py
    │   │   ├── model_builder.py
    │   │   ├── moe_experts_distributor.py
    │   │   ├── nvmo_quantization.py
    │   │   ├── onnx_dag.py
    │   │   ├── onnxscript_fusion.py
    │   │   ├── optimum_conversion.py
    │   │   ├── optimum_merging.py
    │   │   ├── peephole_optimizer.py
    │   │   ├── pipeline
    │   │   │   ├── __init__.py
    │   │   │   └── step_utils.py
    │   │   ├── qnn
    │   │   │   ├── __init__.py
    │   │   │   └── qnn_preprocess.py
    │   │   ├── quantization.py
    │   │   ├── session_params_tuning.py
    │   │   ├── split.py
    │   │   ├── static_llm.py
    │   │   ├── tensorrt
    │   │   │   ├── __init__.py
    │   │   │   └── trt_dla_transforms.py
    │   │   ├── transformer_optimization.py
    │   │   ├── vitis_ai
    │   │   │   ├── __init__.py
    │   │   │   ├── calibrate.py
    │   │   │   ├── meta_data.py
    │   │   │   ├── preprocess.py
    │   │   │   ├── quant_utils.py
    │   │   │   ├── quantize.py
    │   │   │   ├── quantizer.py
    │   │   │   └── refine.py
    │   │   └── vitis_ai_quantization.py
    │   ├── openvino
    │   │   ├── __init__.py
    │   │   ├── conversion.py
    │   │   ├── encapsulation.py
    │   │   ├── io_update.py
    │   │   ├── optimum_intel.py
    │   │   └── quantization.py
    │   ├── pass_config.py
    │   ├── pytorch
    │   │   ├── __init__.py
    │   │   ├── autoawq.py
    │   │   ├── capture_split_info.py
    │   │   ├── cluster.py
    │   │   ├── common.py
    │   │   ├── gptq.py
    │   │   ├── hadamard_utils.py
    │   │   ├── lora.py
    │   │   ├── merge_adapter_weights.py
    │   │   ├── pytorch_lightning_utils.py
    │   │   ├── qat_utils.py
    │   │   ├── quantization_aware_training.py
    │   │   ├── rotate.py
    │   │   ├── sgdg.py
    │   │   ├── slicegpt.py
    │   │   ├── sparsegpt.py
    │   │   ├── sparsegpt_utils.py
    │   │   ├── tensor_parallel.py
    │   │   ├── tensor_parallel_layers.py
    │   │   ├── tensor_parallel_llama2.py
    │   │   ├── torch_trt_conversion.py
    │   │   ├── train_utils.py
    │   │   └── trt_utils.py
    │   ├── qnn
    │   │   ├── __init__.py
    │   │   ├── context_binary_generator.py
    │   │   ├── conversion.py
    │   │   └── model_lib_generator.py
    │   ├── snpe
    │   │   ├── __init__.py
    │   │   ├── conversion.py
    │   │   ├── quantization.py
    │   │   └── snpe_to_onnx.py
    │   └── utils
    │   │   └── __init__.py
    ├── platform_sdk
    │   ├── __init__.py
    │   └── qualcomm
    │   │   ├── __init__.py
    │   │   ├── configure
    │   │       ├── __init__.py
    │   │       ├── __main__.py
    │   │       └── configure.py
    │   │   ├── constants.py
    │   │   ├── copy_libcdsprpc.ps1
    │   │   ├── create_python_env.ps1
    │   │   ├── create_python_env.sh
    │   │   ├── env.py
    │   │   ├── qnn
    │   │       ├── __init__.py
    │   │       ├── env.py
    │   │       ├── qnn.py
    │   │       └── utils
    │   │       │   └── __init__.py
    │   │   ├── runner.py
    │   │   ├── snpe
    │   │       ├── __init__.py
    │   │       ├── env.py
    │   │       ├── snpe.py
    │   │       ├── tools
    │   │       │   ├── __init__.py
    │   │       │   ├── dev.py
    │   │       │   └── inference.py
    │   │       └── utils
    │   │       │   ├── __init__.py
    │   │       │   └── adb.py
    │   │   └── utils
    │   │       ├── __init__.py
    │   │       ├── data_loader.py
    │   │       └── input_list.py
    ├── resource_path.py
    ├── search
    │   ├── __init__.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── optuna_sampler.py
    │   │   ├── random_sampler.py
    │   │   ├── search_sampler.py
    │   │   ├── sequential_sampler.py
    │   │   └── tpe_sampler.py
    │   ├── search_parameter.py
    │   ├── search_point.py
    │   ├── search_results.py
    │   ├── search_sample.py
    │   ├── search_space.py
    │   ├── search_strategy.py
    │   └── utils.py
    ├── systems
    │   ├── __init__.py
    │   ├── accelerator_creator.py
    │   ├── azureml
    │   │   ├── __init__.py
    │   │   ├── aml_evaluation_runner.py
    │   │   ├── aml_pass_runner.py
    │   │   ├── aml_system.py
    │   │   └── aml_workflow_runner.py
    │   ├── common.py
    │   ├── docker
    │   │   ├── Dockerfile
    │   │   ├── Dockerfile.cpu
    │   │   ├── Dockerfile.gpu
    │   │   ├── Dockerfile.openvino
    │   │   ├── __init__.py
    │   │   ├── docker_system.py
    │   │   ├── eval.py
    │   │   ├── runner.py
    │   │   └── utils.py
    │   ├── isolated_ort
    │   │   ├── __init__.py
    │   │   ├── inference_runner.py
    │   │   └── isolated_ort_system.py
    │   ├── local.py
    │   ├── olive_system.py
    │   ├── python_environment
    │   │   ├── __init__.py
    │   │   ├── common_requirements.txt
    │   │   ├── evaluation_runner.py
    │   │   ├── pass_runner.py
    │   │   └── python_environment_system.py
    │   ├── system_alias.py
    │   ├── system_config.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── arg_parser.py
    │   │   ├── available_providers_runner.py
    │   │   └── misc.py
    └── workflows
    │   ├── __init__.py
    │   └── run
    │       ├── __init__.py
    │       ├── __main__.py
    │       ├── config.py
    │       └── run.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── scripts
    ├── format_json.py
    ├── generate_cost_model_artifacts.py
    └── overwrite_version.py
├── setup.py
└── test
    ├── __init__.py
    ├── integ_test
        ├── __init__.py
        ├── aml_model_test
        │   ├── __init__.py
        │   ├── conda.yaml
        │   └── test_aml_model.py
        ├── aml_resource_path
        │   ├── __init__.py
        │   └── test_aml_resource_path.py
        ├── evaluator
        │   ├── __init__.py
        │   ├── azureml_eval
        │   │   ├── __init__.py
        │   │   ├── conda.yaml
        │   │   ├── test_aml_evaluation.py
        │   │   ├── user_script.py
        │   │   └── utils.py
        │   ├── docker_eval
        │   │   ├── __init__.py
        │   │   ├── dockerfile
        │   │   │   └── Dockerfile
        │   │   ├── test_docker_evaluation.py
        │   │   ├── user_script.py
        │   │   └── utils.py
        │   └── local_eval
        │   │   ├── __init__.py
        │   │   ├── test_local_evaluation.py
        │   │   ├── user_script.py
        │   │   └── utils.py
        ├── pass_runner
        │   ├── __init__.py
        │   └── test_docker_system.py
        └── utils.py
    ├── multiple_ep
        ├── __init__.py
        ├── requirements.txt
        ├── test_aml_system.py
        ├── test_docker_system.py
        ├── test_python_env_system.py
        ├── user_script.py
        └── utils.py
    ├── requirements-test-cpu.txt
    ├── requirements-test-gpu.txt
    ├── requirements-test.txt
    └── unit_test
        ├── .gitignore
        ├── __init__.py
        ├── assets
            ├── __init__.py
            └── user_script.py
        ├── auto_optimizer
            ├── __init__.py
            ├── mock_data
            │   └── available_pass_flows.yaml
            └── test_auto_optimizer.py
        ├── cli
            ├── __init__.py
            ├── output_model
            │   └── model_config.json
            ├── test_base.py
            └── test_cli.py
        ├── common
            ├── __init__.py
            ├── test_container_client_factory.py
            ├── test_copy_dir.py
            ├── test_get_attr.py
            ├── test_hardlink_copy.py
            ├── test_hf.py
            ├── test_hf_wrapper.py
            ├── test_import_lib.py
            ├── test_retry.py
            └── test_save_load_weights.py
        ├── conftest.py
        ├── data_container
            ├── __init__.py
            ├── test_data_config.py
            ├── test_data_container.py
            ├── test_dataloader.py
            ├── test_dataset.py
            └── test_template.py
        ├── engine
            ├── __init__.py
            ├── mock_data
            │   └── footprints.json
            ├── packaging
            │   ├── __init__.py
            │   ├── code
            │   │   └── score.py
            │   └── test_packaging_generator.py
            ├── test_engine.py
            ├── test_footprint.py
            └── test_output.py
        ├── evaluator
            ├── __init__.py
            ├── test_accuracy.py
            ├── test_metric.py
            ├── test_metric_backend.py
            └── test_olive_evaluator.py
        ├── hardware
            ├── __init__.py
            └── test_accelerator.py
        ├── model
            ├── __init__.py
            ├── test_composite_model.py
            ├── test_hf_config.py
            ├── test_hf_model.py
            ├── test_kv_cache_config.py
            ├── test_mlflow_model.py
            ├── test_onnx_model.py
            ├── test_pytorch_model.py
            └── user_script.py
        ├── passes
            ├── __init__.py
            ├── common
            │   ├── __init__.py
            │   └── test_user_script.py
            ├── inc
            │   ├── __init__.py
            │   └── test_inc_quantization.py
            ├── onnx
            │   ├── __init__.py
            │   ├── pipeline
            │   │   ├── __init__.py
            │   │   ├── step_config.json
            │   │   └── test_step_utils.py
            │   ├── test_bnb_quantization.py
            │   ├── test_common.py
            │   ├── test_compose.py
            │   ├── test_context_binary.py
            │   ├── test_conversion.py
            │   ├── test_dynamic_to_fixed_shape.py
            │   ├── test_extract_adapters.py
            │   ├── test_float16_conversion.py
            │   ├── test_graph_surgeries.py
            │   ├── test_hqq_quantization.py
            │   ├── test_io_datatype_converter.py
            │   ├── test_mixed_precision.py
            │   ├── test_mnb_to_qdq.py
            │   ├── test_model_builder.py
            │   ├── test_nvmo_quantization.py
            │   ├── test_onnxscript_fusion.py
            │   ├── test_optimum_conversion.py
            │   ├── test_peephole_optimizer.py
            │   ├── test_pre_post_processing_op.py
            │   ├── test_qnn_mixed_precision_overrides.py
            │   ├── test_qnn_preprocess.py
            │   ├── test_quantization.py
            │   ├── test_session_params_tuning.py
            │   ├── test_split_model.py
            │   ├── test_static_llm.py
            │   ├── test_transformer_optimization.py
            │   └── test_trt_dla_transforms.py
            ├── openvino
            │   ├── __init__.py
            │   ├── test_openvino_conversion.py
            │   ├── test_openvino_encapsulation.py
            │   ├── test_openvino_io_update.py
            │   ├── test_openvino_optimum_conversion.py
            │   ├── test_openvino_quantization.py
            │   └── user_script.py
            ├── pytorch
            │   ├── __init__.py
            │   ├── test_autoawq.py
            │   ├── test_capture_split_info.py
            │   ├── test_gptq.py
            │   ├── test_lora.py
            │   ├── test_quantization_aware_training.py
            │   ├── test_rotate.py
            │   ├── test_slicegpt.py
            │   ├── test_sparsegpt.py
            │   └── test_torch_trt_conversion.py
            ├── qnn
            │   ├── __init__.py
            │   ├── test_qnn_context_bin_generator.py
            │   └── test_qnn_conversion.py
            ├── test_pass.py
            ├── test_pass_serialization.py
            └── vitis_ai
            │   ├── __init__.py
            │   ├── test_vitis_ai_add_metadata.py
            │   └── test_vitis_ai_quantization.py
        ├── resource_path
            ├── __init__.py
            └── test_resource_path.py
        ├── search
            ├── samplers
            │   ├── test_random_sampler.py
            │   ├── test_sequential_sampler.py
            │   └── test_tpe_sampler.py
            ├── test_search_results.py
            ├── test_search_space.py
            └── test_search_strategy.py
        ├── snpe
            ├── __init__.py
            └── test_adb_run.py
        ├── systems
            ├── __init__.py
            ├── azureml
            │   ├── __init__.py
            │   ├── data_dir
            │   │   └── datafile.json
            │   ├── output_metrics
            │   │   └── pipeline_output
            │   │   │   └── named-outputs
            │   │   │       ├── accuracy
            │   │   │           └── metric_result.json
            │   │   │       └── latency
            │   │   │           └── metric_result.json
            │   ├── script_dir
            │   │   └── user_script.py
            │   ├── test_alias_system.py
            │   └── test_aml_system.py
            ├── docker
            │   ├── __init__.py
            │   ├── output_local_path
            │   │   └── eval_res.json
            │   └── test_docker_system.py
            ├── isolated_ort
            │   ├── __init__.py
            │   └── test_isolated_ort_system.py
            ├── python_environment
            │   ├── __init__.py
            │   └── test_python_environment_system.py
            ├── test_local.py
            └── test_utils.py
        ├── test_cache.py
        ├── test_package_config.py
        ├── utils.py
        └── workflows
            ├── __init__.py
            ├── mock_data
                ├── default_engine.json
                ├── dependency_setup.json
                ├── readymade_system.json
                └── user_script.json
            ├── test_run_config.py
            ├── test_setup.py
            └── test_workflow_run.py


/.azure_pipelines/dockerfiles/linux-cpu.dockerfile:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | ARG BASE_IMAGE
 6 | FROM ${BASE_IMAGE}
 7 | 
 8 | ARG PYTHON_VERSION
 9 | 
10 | RUN apt-get update && \
11 |     apt-get install -y --no-install-recommends \
12 |     python${PYTHON_VERSION} \
13 |     python${PYTHON_VERSION}-dev \
14 |     python${PYTHON_VERSION}-venv \
15 |     python3-pip
16 | RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
17 | 
18 | COPY . /olive
19 | WORKDIR /olive
20 | RUN python -m venv olive-venv
21 | RUN . olive-venv/bin/activate && \
22 |     pip install --upgrade setuptools && \
23 |     pip install -e .
24 | 


--------------------------------------------------------------------------------
/.azure_pipelines/job_templates/build-docker-image-template.yaml:
--------------------------------------------------------------------------------
 1 | # Docker image build template
 2 | 
 3 | parameters:
 4 |   dockerfile: ''
 5 |   python_version: ''
 6 |   docker_image: ''
 7 |   base_image: ''
 8 |   trt_version: ''
 9 | 
10 | steps:
11 | - script: |
12 |     docker login -u $(docker-username) -p $(docker-password)
13 |     docker build \
14 |     --build-arg BASE_IMAGE=${{ parameters.base_image }} \
15 |     --build-arg TENSORRT_VERSION=${{ parameters.trt_version }} \
16 |     --build-arg PYTHON_VERSION=${{ parameters.python_version }} \
17 |     -t ${{ parameters.docker_image }} \
18 |     -f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} .
19 |   displayName: Build Docker Image
20 | 
21 | - script: |
22 |     docker version
23 |     docker image ls
24 |     docker system df
25 |     df -h
26 |   displayName: Check Docker Images
27 | 


--------------------------------------------------------------------------------
/.azure_pipelines/job_templates/huggingface-login-template.yaml:
--------------------------------------------------------------------------------
1 | parameters:
2 |   hf_token: 'huggingface_token'
3 | 
4 | steps:
5 | - script: huggingface-cli login --token ${{ parameters.hf_token }}
6 |   displayName: 'Hugging Face Login'
7 | 


--------------------------------------------------------------------------------
/.azure_pipelines/job_templates/olive-setup-template.yaml:
--------------------------------------------------------------------------------
 1 | parameters:
 2 |   python_version: '3.10'
 3 |   onnxruntime: 'onnxruntime'
 4 |   onnxruntime_nightly: false
 5 |   torch: torch
 6 | 
 7 | steps:
 8 | - task: UsePythonVersion@0
 9 |   inputs:
10 |     versionSpec: ${{ parameters.python_version }}
11 |   displayName: Use Python ${{ parameters.python_version }}
12 | 
13 | - script: python -m pip install ${{ parameters.torch }}
14 |   displayName: Install torch
15 | 
16 | - script: python -m pip install .
17 |   displayName: Install Olive
18 | 
19 | - ${{ if  eq(parameters.onnxruntime_nightly, true) }}:
20 |   - script: |
21 |       pip install -r  https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt
22 |       pip install ${{ parameters.onnxruntime }} --pre --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --disable-pip-version-check
23 |     displayName: Install ${{ parameters.onnxruntime }}
24 | - ${{ else }}:
25 |   - script: |
26 |       pip install ${{ parameters.onnxruntime }}
27 |     displayName: Install ${{ parameters.onnxruntime }}
28 | 


--------------------------------------------------------------------------------
/.azure_pipelines/olive-aml-ci.yaml:
--------------------------------------------------------------------------------
 1 | trigger:
 2 |   batch: true
 3 |   branches:
 4 |     include:
 5 |     - main
 6 |   paths:
 7 |     exclude:
 8 |     - docs/*
 9 |     - examples/README.md
10 |     - examples/**/README.md
11 |     - README.md
12 |     - CONTRIBUTING.md
13 |     - LICENSE
14 | pr: none
15 | 
16 | jobs:
17 | # Linux examples test
18 | - template: job_templates/olive-example-cpu-template.yaml
19 |   parameters:
20 |     name: Linux_CI
21 |     pool: $(OLIVE_POOL_UBUNTU2004)
22 |     subfolder: azureml
23 |     examples:
24 |       bert_ptq_cpu_aml:
25 |         exampleFolder: bert
26 |         exampleName: bert_ptq_cpu_aml
27 |       resnet_ptq_cpu:
28 |         exampleFolder: resnet
29 |         exampleName: resnet_ptq_cpu_aml
30 |       resnet_vitis_ai_ptq_cpu:
31 |         exampleFolder: resnet
32 |         exampleName: resnet_vitis_ai_ptq_cpu_aml
33 |       llama2:
34 |         exampleFolder: llama2
35 |         exampleName: llama2
36 |         exampleRequirements: requirements-pipeline.txt
37 | 
38 | # Windows examples test
39 | - template: job_templates/olive-example-cpu-template.yaml
40 |   parameters:
41 |     name: Windows_CI
42 |     pool: $(OLIVE_POOL_WIN2019)
43 |     subfolder: azureml
44 |     examples:
45 |       bert_ptq_cpu_aml:
46 |         exampleFolder: bert
47 |         exampleName: bert_ptq_cpu_aml
48 |       resnet_ptq_cpu:
49 |         exampleFolder: resnet
50 |         exampleName: resnet_ptq_cpu_aml
51 |       resnet_vitis_ai_ptq_cpu:
52 |         exampleFolder: resnet
53 |         exampleName: resnet_vitis_ai_ptq_cpu_aml
54 | 


--------------------------------------------------------------------------------
/.azure_pipelines/package_publish.yaml:
--------------------------------------------------------------------------------
 1 | trigger: none
 2 | 
 3 | pool:
 4 |   name: $(OLIVE_POOL_UBUNTU2004)
 5 | 
 6 | steps:
 7 | - task: UsePythonVersion@0
 8 |   inputs:
 9 |     versionSpec: '3.9'
10 | 
11 | - script: python -m pip install --upgrade pip setuptools wheel twine
12 |   displayName: 'Install tools'
13 | 
14 | - script: |
15 |     python setup.py bdist_wheel
16 |   displayName: 'Build package'
17 | 
18 | - task: CredScan@3
19 |   displayName: 'Run CredScan'
20 |   inputs:
21 |     debugMode: false
22 |   continueOnError: true
23 | 
24 | - task: ComponentGovernanceComponentDetection@0
25 |   displayName: Component Detection
26 |   inputs:
27 |     # ignore docs and examples directories. They are not part of the package.
28 |     ignoreDirectories:
29 |       $(Build.SourcesDirectory)/docs
30 |       $(Build.SourcesDirectory)/examples
31 | 
32 | - task: CopyFiles@2
33 |   displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)'
34 |   inputs:
35 |     SourceFolder: '$(Build.SourcesDirectory)/dist'
36 |     Contents: '*.whl'
37 |     TargetFolder: '$(Build.ArtifactStagingDirectory)'
38 | 
39 | - task: PublishBuildArtifacts@1
40 |   displayName: 'Publish artifacts'
41 |   inputs:
42 |     ArtifactName: olive
43 | 


--------------------------------------------------------------------------------
/.azure_pipelines/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-devops
2 | azureml-pipeline
3 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | omit =
 3 |     olive/logging.py
 4 |     olive/passes/pytorch/pytorch_lightning_utils.py
 5 |     olive/passes/pytorch/qat_utils.py
 6 |     olive/systems/docker/eval.py
 7 | 
 8 | [report]
 9 | exclude_lines =
10 |     pragma: no cover
11 |     def __repr__
12 |     raise AssertionError
13 |     raise NotImplementedError
14 |     if __name__ == .__main__.:
15 |     class .*\bConfig\):
16 |     @(abc\.)?abstractmethod
17 |     @(abc\.)?staticmethod
18 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 | 
3 | [*]
4 | 
5 | trim_trailing_whitespace = true
6 | insert_final_newline = true
7 | indent_style = space
8 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | per-file-ignores =
4 |     __init__.py:F401
5 |     pydantic_v1.py:F401
6 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | * text=auto
3 | 
4 | # Windows specific files should retain windows line-endings
5 | *.ps1 text eol=crlf
6 | 
7 | # make sure .sh retains Unix line endings, even when checked out on windows.
8 | *.sh text eol=lf
9 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior.
15 | 
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 | 
19 | **Olive config**
20 | Add Olive configurations here.
21 | 
22 | **Olive logs**
23 | Add logs here.
24 | 
25 | **Other information**
26 |  - OS: [e.g. Windows, Linux]
27 |  - Olive version: [e.g. 0.4.0 or main]
28 |  - ONNXRuntime package and version: [e.g. onnxruntime-gpu: 1.16.1]
29 |  - Transformers package version: [e.g. transformers 4.44.1]
30 | 
31 | 
32 | **Additional context**
33 | Add any other context about the problem here.
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: File a feature or enhancement proposal
 3 | title: "[FR]: "
 4 | labels: ["enhancement"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thank you for submitting a feature request.
10 |   - type: textarea
11 |     id: proposal
12 |     attributes:
13 |       label: Proposal Summary
14 |       description: In a few sentences, provide a clear, high-level description of the feature request
15 |     validations:
16 |       required: true
17 |   - type: checkboxes
18 |     attributes:
19 |       label: What component(s) does this request affect?
20 |       description: Please choose one or more components below.
21 |       options:
22 |         - label: OliveModels
23 |         - label: OliveSystems
24 |         - label: OliveEvaluator
25 |         - label: Metrics
26 |         - label: Engine
27 |         - label: Passes
28 |         - label: Other
29 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Describe your changes
 2 | 
 3 | ## Checklist before requesting a review
 4 | - [ ] Add unit tests for this change.
 5 | - [ ] Make sure all tests can pass.
 6 | - [ ] Update documents if necessary.
 7 | - [ ] Lint and apply fixes to your code by running `lintrunner -a`
 8 | - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes.
 9 | - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR.
10 | 
11 | ## (Optional) Issue link
12 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: end-of-file-fixer
 6 |       - id: trailing-whitespace
 7 |         args: [--markdown-linebreak-ext=md]
 8 |       - id: check-yaml
 9 |       - id: requirements-txt-fixer
10 |   - repo: https://github.com/MarcoGorelli/absolufy-imports
11 |     rev: v0.3.1
12 |     hooks:
13 |       - id: absolufy-imports
14 |         exclude: examples/
15 |   - repo: local
16 |     hooks:
17 |       - id: format-json
18 |         name: Format JSON
19 |         language: python
20 |         entry: python scripts/format_json.py
21 |         files: \.(json)$
22 |         args: [
23 |           '--indent=4',
24 |           '--max-line-length=120'
25 |         ]
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-exclude *.py[cod]
2 | recursive-include examples *
3 | recursive-include docs *
4 | prune */**/__pycache__
5 | prune docs/build/**/*
6 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?= -a -W
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | SCHEMABUILD   = python $(SOURCEDIR)/dump_schema.py
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | schema:
17 | 	$(SCHEMABUILD) --output $(BUILDDIR)/html/schema.json
18 | 
19 | .PHONY: help Makefile schema
20 | 
21 | # Catch-all target: route all unknown targets to Sphinx using the new
22 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
23 | %: Makefile
24 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
25 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Generating the documentation
 2 | 
 3 | To generate the documentation, you first have to build it.
 4 | 
 5 | ## Pre-requisites
 6 | 
 7 | Install Olive. At the root of the code repository:
 8 | 
 9 | ```bash
10 | pip install -e .
11 | ```
12 | 
13 | Install pip requirements. At `docs`:
14 | 
15 | ```bash
16 | pip install -r requirements.txt
17 | ```
18 | 
19 | ## Building the documentation
20 | 
21 | At `docs`:
22 | 
23 | ```bash
24 | make html
25 | make linkcheck
26 | ```
27 | 
28 | ## Previewing the documentation
29 | 
30 | At `docs/build/html`:
31 | 
32 | ```bash
33 | python -m http.server {port-number}
34 | ```
35 | 
36 | The documentation site will be running at `http://localhost:<port-number>`
37 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 |     set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SCHEMABUILD="python %SOURCEDIR%/dump_schema.py"
13 | 
14 | %SPHINXBUILD% >NUL 2>NUL
15 | if errorlevel 9009 (
16 |     echo.
17 |     echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
18 |     echo.installed, then set the SPHINXBUILD environment variable to point
19 |     echo.to the full path of the 'sphinx-build' executable. Alternatively you
20 |     echo.may add the Sphinx directory to PATH.
21 |     echo.
22 |     echo.If you don't have Sphinx installed, grab it from
23 |     echo.https://www.sphinx-doc.org/
24 |     exit /b 1
25 | )
26 | 
27 | if "%1" == "" goto help
28 | 
29 | if "%1" == "schema" goto schema
30 | 
31 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -W %O%
32 | goto end
33 | 
34 | :help
35 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
36 | goto end
37 | 
38 | :schema
39 | %SCHEMABUILD% --output %SOURCEDIR%/html/schema.json
40 | 
41 | :end
42 | popd
43 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # we use v1 API so need autodoc_pydantic<2.0.0
 2 | # will also install pydantic<2.0.0
 3 | autodoc_pydantic<2.0.0
 4 | azure-ai-ml>=1.11.1
 5 | azure-identity
 6 | azureml-fsspec
 7 | docker
 8 | # latest 3.24.0 will break the pipeline
 9 | # TODO(team): 55399 Switch back to the latest version once it's compatible with the pipeline
10 | marshmallow<3.24.0
11 | myst_parser
12 | onnxconverter_common
13 | psutil
14 | pydata_sphinx_theme
15 | pytorch_lightning
16 | sphinx>=6.1.3
17 | sphinx-argparse
18 | sphinx-copybutton
19 | sphinx-tabs
20 | sphinx_design
21 | sphinxcontrib-jquery
22 | sphinxcontrib-mermaid
23 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/header.css:
--------------------------------------------------------------------------------
 1 | h1 {
 2 |     font-size: 175%;
 3 | }
 4 | 
 5 | h2 {
 6 |     font-size: 150%;
 7 | }
 8 | 
 9 | h3 {
10 |     font-size: 130%;
11 | }
12 | 
13 | h4 {
14 |     font-size: 100%;
15 |     font-weight: 600;
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/source/dump_schema.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from pathlib import Path
 3 | 
 4 | from olive.workflows.run.config import RunConfig
 5 | 
 6 | if __name__ == "__main__":
 7 |     parser = argparse.ArgumentParser(description="Dump workflow schema")
 8 | 
 9 |     parser.add_argument("--output", type=str, default="schema.json", help="Output file")
10 | 
11 |     args = parser.parse_args()
12 | 
13 |     output_path = Path(args.output)
14 |     output_path.parent.mkdir(parents=True, exist_ok=True)
15 | 
16 |     with open(output_path, "w") as f:
17 |         f.write(RunConfig.schema_json(indent=2))
18 | 


--------------------------------------------------------------------------------
/docs/source/features/azure-ai/index.rst:
--------------------------------------------------------------------------------
 1 | Azure AI
 2 | ========================
 3 | 
 4 | .. grid:: 2 2 2 3
 5 |    :class-container: cards
 6 | 
 7 |    .. grid-item-card::
 8 |       **Azure AI integration**
 9 | 
10 |       :octicon:`arrow-right;1em;sd-text-info` `Azure AI integration <azure-ai.html>`_
11 | 
12 |    .. grid-item-card::
13 |       **Connect your own machines to Azure by Azure Arc**
14 | 
15 |       :octicon:`arrow-right;1em;sd-text-info` `Azure Arc <azure-arc.html>`_
16 | 
17 |    .. grid-item-card::
18 |       **Scripts to manage your Azure assets**
19 | 
20 |       :octicon:`arrow-right;1em;sd-text-info` `Azure scripts <azure-script.html>`_
21 | 
22 |    .. grid-item-card::
23 |       **Run Olive workflow on Azure**
24 | 
25 |       :octicon:`arrow-right;1em;sd-text-info` `Remote workflow <remote-workflow.html>`_
26 | 
27 |    .. grid-item-card::
28 |       **Share models cache on Azure**
29 | 
30 |       :octicon:`arrow-right;1em;sd-text-info` `Shared cache <shared-model-cache.html>`_
31 | 
32 | 
33 | .. toctree::
34 |    :maxdepth: 1
35 |    :hidden:
36 | 
37 |    azure-ai
38 |    azure-arc
39 |    azure-script
40 |    remote-workflow
41 |    shared-model-cache
42 | 


--------------------------------------------------------------------------------
/docs/source/features/ihv-integration/index.rst:
--------------------------------------------------------------------------------
 1 | IHV Toolkit Integration
 2 | ========================
 3 | 
 4 | .. grid:: 2 2 2 3
 5 |    :class-container: cards
 6 | 
 7 |    .. grid-item-card::
 8 |       **OpenVINO**
 9 | 
10 |       :octicon:`arrow-right;1em;sd-text-info` `OpenVINO <openvino.html>`_
11 | 
12 |    .. grid-item-card::
13 |       **QNN**
14 | 
15 |       :octicon:`arrow-right;1em;sd-text-info` `QNN <qnn.html>`_
16 | 
17 |    .. grid-item-card::
18 |       **SNPE**
19 | 
20 |       :octicon:`arrow-right;1em;sd-text-info` `SNPE <snpe.html>`_
21 | 
22 | .. toctree::
23 |    :maxdepth: 1
24 |    :hidden:
25 | 
26 |    openvino
27 |    qnn
28 |    snpe
29 | 


--------------------------------------------------------------------------------
/docs/source/features/model-conversion/convert-pytorch.md:
--------------------------------------------------------------------------------
 1 | # PyTorch
 2 | 
 3 | PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
 4 | 
 5 | ## TorchTRTConversion
 6 | `TorchTRTConversion` converts the `torch.nn.Linear` modules in the transformer layers in a Hugging Face PyTorch model to `TRTModules` from `torch_tensorrt` with fp16 precision and sparse weights, if
 7 | applicable. `torch_tensorrt` is an extension to `torch` where TensorRT compiled engines can be used like regular `torch.nn.Module`s. This pass can be used to accelerate inference on transformer models
 8 | with sparse weights by taking advantage of the 2:4 structured sparsity pattern supported by TensorRT.
 9 | 
10 | This pass only supports HfModels. Please refer to [TorchTRTConversion](torch_trt_conversion) for more details on the types of transformers models supported.
11 | 
12 | ### Example Configuration
13 | ```json
14 | {
15 |     "type": "TorchTRTConversion"
16 | }
17 | ```
18 | 


--------------------------------------------------------------------------------
/docs/source/features/model-conversion/index.rst:
--------------------------------------------------------------------------------
 1 | Model Conversion
 2 | ========================
 3 | 
 4 | .. grid:: 2 2 2 3
 5 |    :class-container: cards
 6 | 
 7 |    .. grid-item-card::
 8 |       **OpenVINO**
 9 | 
10 |       :octicon:`arrow-right;1em;sd-text-info` `ONNX Conversion <convert-onnx.html>`_
11 | 
12 |    .. grid-item-card::
13 |       **QNN**
14 | 
15 |       :octicon:`arrow-right;1em;sd-text-info` `TorchTRT Conversion <convert-pytorch.html>`_
16 | 
17 |    .. grid-item-card::
18 |       **SNPE**
19 | 
20 |       :octicon:`arrow-right;1em;sd-text-info` `SNPE <snpe.html>`_
21 | 
22 | .. toctree::
23 |    :maxdepth: 1
24 |    :hidden:
25 | 
26 |    convert-onnx
27 |    convert-pytorch
28 | 


--------------------------------------------------------------------------------
/docs/source/images/auto_opt/pass_flows.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/auto_opt/pass_flows.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/add-infra.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-infra.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/add-kub-detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub-detail.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/add-kub-to-arc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub-to-arc.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/add-kub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/attach-kub.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/attach-kub.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/attach-suc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/attach-suc.png


--------------------------------------------------------------------------------
/docs/source/images/azure_arc/new-compute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/new-compute.png


--------------------------------------------------------------------------------
/docs/source/images/datacontainer_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/datacontainer_example.png


--------------------------------------------------------------------------------
/docs/source/images/dataset-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/dataset-flow.png


--------------------------------------------------------------------------------
/docs/source/images/model_splitting/cost_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/model_splitting/cost_model.png


--------------------------------------------------------------------------------
/docs/source/images/model_splitting/num_splits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/model_splitting/num_splits.png


--------------------------------------------------------------------------------
/docs/source/images/multi-lora-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/multi-lora-diagram.png


--------------------------------------------------------------------------------
/docs/source/images/olive-black-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-black-text.png


--------------------------------------------------------------------------------
/docs/source/images/olive-design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-design.png


--------------------------------------------------------------------------------
/docs/source/images/olive-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-flow.png


--------------------------------------------------------------------------------
/docs/source/images/olive-white-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-white-text.png


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | # Olive: The AI Model Optimization Toolkit for the ONNX Runtime
 2 | <br/>
 3 | 
 4 | ```{gallery-grid}
 5 | :grid-columns: 1 2 2 3
 6 | 
 7 | - header: "{octicon}`codescan-checkmark` Overview"
 8 |   content: "Learn the benefits of using Olive to optimize your models.<br/>{octicon}`arrow-right` [Overview](why-olive.md)"
 9 | - header: "{octicon}`zap` Get Started"
10 |   content: "Install `olive-ai` with `pip` and get up and running with OLIVE in minutes.<br/>{octicon}`arrow-right` [Get Started](getting-started/getting-started.md)"
11 | - header: "{octicon}`rocket` How To"
12 |   content: "Find more details on specific Olive capabilities, such as quantization, running workflows on remote compute, model packaging, conversions, and more!<br/>{octicon}`arrow-right` [How-To](how-to/index)"
13 | - header: "{fas}`code`  Reference"
14 |   content: "Get more details on specific Olive capabilities, such as running workflows on remote compute (for example, Azure AI), model packaging, conversions, and more!<br/>{octicon}`arrow-right` [Reference](reference/index)"
15 | - header: "{octicon}`diff-added`  Extending Olive"
16 |   content: "Learn about the design of Olive and how to extend Olive with your own optimization methods.<br/>{octicon}`arrow-right` [Extend Olive](extending/index)"
17 | ```
18 | 
19 | 
20 | ```{toctree}
21 | :maxdepth: 2
22 | :hidden:
23 | 
24 | why-olive.md
25 | getting-started/getting-started.md
26 | how-to/index
27 | examples.md
28 | features/index
29 | reference/index
30 | extending/index
31 | ```
32 | 


--------------------------------------------------------------------------------
/docs/source/reference/index.rst:
--------------------------------------------------------------------------------
 1 | Reference
 2 | ==============
 3 | 
 4 | .. grid:: 2 2 2 3
 5 |    :class-container: cards
 6 | 
 7 |    .. grid-item-card::
 8 |       **CLI**
 9 | 
10 |       Learn about CLI features and options.
11 | 
12 |       :octicon:`arrow-right;1em;sd-text-info` `CLI <cli.html>`_
13 | 
14 |    .. grid-item-card::
15 |       **Olive options**
16 | 
17 |       Explore Olive configuration options.
18 | 
19 |       :octicon:`arrow-right;1em;sd-text-info` `Olive options <options.html>`_
20 | 
21 |    .. grid-item-card::
22 |       **Pass**
23 | 
24 |       Explore Olive passes.
25 | 
26 |       :octicon:`arrow-right;1em;sd-text-info` `Pass <pass.html>`_
27 | 
28 | .. toctree::
29 |    :maxdepth: 2
30 |    :hidden:
31 | 
32 |    cli
33 |    options
34 |    pass
35 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/examples/adetailer/README.md:
--------------------------------------------------------------------------------
 1 | ## How to run
 2 | ### Pip requirements
 3 | Install the necessary python packages:
 4 | ```
 5 | python -m pip install -r requirements.txt
 6 | ```
 7 | 
 8 | ### Run sample using config
 9 | ```
10 | olive run --config ./face_yolo_qnn.json
11 | ```
12 | 
13 | **Note**: The special configuration of op_types_to_quantize in the face_yolo_qnn.json file is to exclude the mul operation. This is because after quantizing the mul operation, the latency of this model on the QNN will increase significantly.
14 | 
15 | 


--------------------------------------------------------------------------------
/examples/adetailer/requirements.txt:
--------------------------------------------------------------------------------
1 | pycocotools
2 | ultralytics
3 | 


--------------------------------------------------------------------------------
/examples/ast/README.md:
--------------------------------------------------------------------------------
 1 | # AST Optimization
 2 | This folder contains examples of AST(Audio Spectrogram Transformer) optimization using olive workflows.
 3 | 
 4 | - CPU: *PyTorch Model -> Onnx Model -> Transformers Optimized Onnx Model -> Quantized Onnx Model -> ONNX Runtime performance tuning*
 5 | 
 6 | - Model: https://huggingface.co/MIT/ast-finetuned-speech-commands-v2
 7 | - Dataset: https://huggingface.co/datasets/speech_commands
 8 | 
 9 | ### Run example using config
10 | 
11 | The `ast.json` is used on CPU optimization which tries to quantize the model and tune the inference config for better performance.
12 | 
13 | First, install required packages according to passes.
14 | ```sh
15 | olive run --config ast.json --setup
16 | ```
17 | 
18 | Then, optimize the model
19 | ```sh
20 | olive run --config ast.json
21 | ```
22 | 
23 | or run simply with python code:
24 | ```python
25 | from olive.workflows import run as olive_run
26 | olive_run("ast.json")
27 | ```
28 | 
29 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
30 | You can then select the best model and config from the candidates and run the model with the selected config.
31 | 


--------------------------------------------------------------------------------
/examples/ast/requirements.txt:
--------------------------------------------------------------------------------
1 | evaluate
2 | librosa
3 | optimum
4 | psutil
5 | # https://github.com/huggingface/evaluate/issues/655
6 | scikit-learn==1.5.2
7 | soundfile
8 | 


--------------------------------------------------------------------------------
/examples/bert/.gitignore:
--------------------------------------------------------------------------------
1 | mlruns/
2 | bert_qat/
3 | 


--------------------------------------------------------------------------------
/examples/bert/bert.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import argparse
 6 | import json
 7 | from pathlib import Path
 8 | 
 9 | if __name__ == "__main__":
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument(
12 |         "--optimize",
13 |         action="store_true",
14 |         help="If set, run transformers optimization pass",
15 |     )
16 |     args = parser.parse_args()
17 | 
18 |     input_filename = "bert_cuda_gpu.template.json"
19 |     with Path(input_filename).open("r") as f:
20 |         config = json.load(f)
21 | 
22 |     if not args.optimize:
23 |         del config["passes"]["transformers_optimization"]
24 | 
25 |     output_filename = input_filename.replace(".template", "")
26 |     with Path(output_filename).open("w") as strm:
27 |         json.dump(config, fp=strm, indent=4)
28 | 


--------------------------------------------------------------------------------
/examples/bert/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.10.16
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - datasets
 9 |       - evaluate
10 |       - optimum
11 |       - psutil
12 |       - scipy
13 |       - scikit-learn==1.5.2 # https://github.com/huggingface/evaluate/issues/655
14 |       - torch
15 |       - --extra-index-url https://download.pytorch.org/whl/cpu
16 |       - transformers>=4.41.1
17 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[cpu]
18 | 


--------------------------------------------------------------------------------
/examples/bert/conda_gpu.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.10.16
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - datasets
 9 |       - evaluate
10 |       - optimum
11 |       - psutil
12 |       - scipy
13 |       - scikit-learn==1.5.2 # https://github.com/huggingface/evaluate/issues/655
14 |       - torch
15 |       - --extra-index-url https://download.pytorch.org/whl/cu118
16 |       - transformers>=4.41.1
17 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu]
18 | 


--------------------------------------------------------------------------------
/examples/bert/openvino/README.md:
--------------------------------------------------------------------------------
1 | # BERT Optimization
2 | 
3 | This folder contains examples of BERT optimization using different workflows for [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) and [Intel/bert-base-uncased-mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) models.
4 | 
5 | - Intel® NPU: [Optimization for BERT base multilingual cased](./bert_base_multilingual_cased/)
6 | - Intel® NPU: [Optimization for BERT base uncased mrpc](./bert_base_uncased_mrpc/)
7 | 


--------------------------------------------------------------------------------
/examples/bert/openvino/bert_base_multilingual_cased/README.md:
--------------------------------------------------------------------------------
 1 | # BERT Base Multilingual Cased Quantization
 2 | 
 3 | This folder contains a sample use case of Olive to optimize a [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) model using OpenVINO tools.
 4 | 
 5 | - Intel® NPU: [BERT Base Multilingual Cased static shape model](#static-shape-model)
 6 | 
 7 | ## Quantization Workflows
 8 | 
 9 | This workflow performs quantization with OpenVINO NNCF. It performs the optimization pipeline:
10 | 
11 | - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
12 | 
13 | ### Static shape model
14 | 
15 | The config file: [bert-base-multilingual-cased_context_ov_static.json](bert-base-multilingual-cased_context_ov_static.json) executes the above workflow producing static shape model.
16 | 
17 | ## How to run
18 | 
19 | Install the necessary python packages:
20 | 
21 | ```bash
22 | python -m pip install olive-ai[openvino]
23 | ```
24 | 
25 | ### Run sample using config
26 | 
27 | The optimization techniques to run are specified in the relevant config json file.
28 | 
29 | ```bash
30 | olive run --config bert-base-multilingual-cased_context_ov_static.json
31 | ```
32 | 
33 | or run simply with python code:
34 | 
35 | ```python
36 | from olive.workflows import run as olive_run
37 | olive_run("bert-base-multilingual-cased_context_ov_static.json")
38 | ```
39 | 
40 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
41 | 


--------------------------------------------------------------------------------
/examples/bert/openvino/bert_base_uncased_mrpc/README.md:
--------------------------------------------------------------------------------
 1 | # BERT Base Uncased MRPC Quantization
 2 | 
 3 | This folder contains a sample use case of Olive to optimize a [Intel/bert-base-uncased-mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) model using OpenVINO tools.
 4 | 
 5 | - Intel® NPU: [BERT Base Uncased MRPC static shape model](#static-shape-model)
 6 | 
 7 | ## Quantization Workflows
 8 | 
 9 | This workflow performs quantization with OpenVINO NNCF. It performs the optimization pipeline:
10 | 
11 | - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
12 | 
13 | ### Static shape model
14 | 
15 | The config file: [bert-base-uncased-mrpc_context_ov_static.json](bert-base-uncased-mrpc_context_ov_static.json) executes the above workflow producing static shape model.
16 | 
17 | ## How to run
18 | 
19 | Install the necessary python packages:
20 | 
21 | ```bash
22 | python -m pip install olive-ai[openvino]
23 | ```
24 | 
25 | ### Run sample using config
26 | 
27 | The optimization techniques to run are specified in the relevant config json file.
28 | 
29 | ```bash
30 | olive run --config bert-base-uncased-mrpc_context_ov_static.json
31 | ```
32 | 
33 | or run simply with python code:
34 | 
35 | ```python
36 | from olive.workflows import run as olive_run
37 | olive_run("bert-base-uncased-mrpc_context_ov_static.json")
38 | ```
39 | 
40 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
41 | 


--------------------------------------------------------------------------------
/examples/bert/qnn/README.md:
--------------------------------------------------------------------------------
1 | ### BERT Optimization with PTQ on Qualcomm NPU using QNN EP
2 | This workflow performs BERT optimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline:
3 | - *PyTorch Model -> Onnx Model -> Static shaped Onnx Model -> Quantized Onnx Model*
4 | 
5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.
6 | 
7 | **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.
8 | 


--------------------------------------------------------------------------------
/examples/bert/qnn/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | evaluate
3 | nltk
4 | optimum
5 | pandas
6 | tabulate
7 | 


--------------------------------------------------------------------------------
/examples/bert/requirements.txt:
--------------------------------------------------------------------------------
 1 | azure-ai-ml
 2 | azure-identity
 3 | # TODO(anyone): load_metrics was removed since 3.0.0. Using evaluate instead
 4 | datasets<3.0.0
 5 | docker>=7.1.0
 6 | evaluate
 7 | neural-compressor
 8 | optimum
 9 | pytorch_lightning
10 | # https://github.com/huggingface/evaluate/issues/655
11 | scikit-learn==1.5.2
12 | scipy
13 | tabulate
14 | transformers
15 | 


--------------------------------------------------------------------------------
/examples/bert/snpe/README.md:
--------------------------------------------------------------------------------
 1 | # Bert model optimization on Qualcomm NPU with SNPE SDK
 2 | This folder contains a sample use case of Olive to convert an bert model Onnx model, then to SNPE DLC and to evaluate the accuracy of the DLC model.
 3 | 
 4 | Performs optimization pipeline:
 5 | - *Pytorch Model -> Onnx Model with Dynamic Shape -> Onnx Model with Fixed Shape -> SNPE Model*
 6 | 
 7 | ## Prerequisites
 8 | ### Download and unzip SNPE SDK
 9 | Download the SNPE SDK zip following [instructions from Qualcomm](https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk)
10 | 
11 | We test it with SNPE v2.18.0.240101.
12 | 
13 | Unzip the file and set the unzipped directory path as environment variable `SNPE_ROOT`.
14 | 
15 | ### Configure SNPE
16 | ```sh
17 | olive configure-qualcomm-sdk --py_version 3.8 --sdk snpe
18 | ```
19 | 
20 | ## Run sample
21 | Run the conversion and quantization locally.
22 | ```
23 | olive run --config bert_snpe.json
24 | ```
25 | 
26 | ## Issues
27 | 
28 | 1. "Module 'qti.aisw.converters' has no attribute 'onnx':
29 |     Refer to this: https://developer.qualcomm.com/comment/21810#comment-21810,
30 |     change the import statement in `{SNPE_ROOT}/lib/python/qti/aisw/converters/onnx/onnx_to_ir.py:L30` to:
31 |     ```python
32 |     from qti.aisw.converters.onnx import composable_custom_op_utils as ComposableCustomOp
33 |     ```
34 | 


--------------------------------------------------------------------------------
/examples/bert/snpe/user_script.py:
--------------------------------------------------------------------------------
 1 | from olive.data.registry import Registry
 2 | 
 3 | 
 4 | @Registry.register_post_process()
 5 | def snpe_post_process(output_data, **kwargs):
 6 |     import torch
 7 | 
 8 |     logits = torch.tensor(output_data["logits"])
 9 |     _, preds = torch.max(logits, dim=-1)
10 | 
11 |     return preds
12 | 


--------------------------------------------------------------------------------
/examples/bge/requirements.txt:
--------------------------------------------------------------------------------
1 | mteb
2 | 


--------------------------------------------------------------------------------
/examples/clip/qnn/README.md:
--------------------------------------------------------------------------------
1 | # CLIP VIT Optimization with PTQ on Qualcomm NPU using QNN EP
2 | This workflow performs CLIP VIT quantization on Qualcomm NPU with ONNX Runtime PTQ. It performs the pipeline:
3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model*
4 | 
5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.
6 | 
7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.
8 | 


--------------------------------------------------------------------------------
/examples/clip/qnn/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | evaluate
3 | tabulate
4 | 


--------------------------------------------------------------------------------
/examples/clip/requirements.txt:
--------------------------------------------------------------------------------
1 | # TODO(anyone): load_metrics was removed since 3.0.0. Using evaluate instead
2 | datasets<3.0.0
3 | evaluate
4 | scikit-learn==1.5.2
5 | transformers
6 | 


--------------------------------------------------------------------------------
/examples/deberta/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-ai-ml
2 | azure-identity
3 | datasets
4 | optimum
5 | 


--------------------------------------------------------------------------------
/examples/deepseek/README.md:
--------------------------------------------------------------------------------
 1 | # Deepseek R1 Distill optimization
 2 | 
 3 | Sample use cases of Olive to optimize a [DeepSeek R1 Distill](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) using Olive.
 4 | 
 5 | - [Finetune and Optimize for CPU/CUDA](../getting_started/olive-deepseek-finetune.ipynb)
 6 | - [QDQ Model with 4-bit Weights & 16-bit Activations](../phi3_5/README.md):
 7 |   - Run the workflow with `olive run --config qdq_config.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-qdq`.
 8 | - [AMD NPU: Optimization and Quantization with for VitisAI](../phi3_5/README.md):
 9 |   - Run the workflow with `olive run --config qdq_config_vitis_ai.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-vai`.
10 | - [PTQ + AOT Compilation for Qualcomm NPUs using QNN EP](../phi3_5/README.md):
11 |   - Run the workflow with `olive run --config qnn_config.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-qnn`.
12 |   - Run the inference with `python app.py -m models/deepseek-r1-qnn -c "<｜User｜>{input}<｜Assistant｜><think>"`.
13 | - [PTQ + AWQ ONNX OVIR Encapsulated 4-bit weight compression using Optimum OpenVINO](./openvino/)
14 | 


--------------------------------------------------------------------------------
/examples/directml/README.md:
--------------------------------------------------------------------------------
1 | # Direct ML
2 | 
3 | Keep `llm`, `squeezenet`, `stable_diffusion` and `stable_diffusion_xl` here for directml team for directml ep.
4 | 


--------------------------------------------------------------------------------
/examples/directml/llm/.gitignore:
--------------------------------------------------------------------------------
1 | /raw_model_data/
2 | /footprints/
3 | 


--------------------------------------------------------------------------------
/examples/directml/llm/chat_app/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))
5 | 


--------------------------------------------------------------------------------
/examples/directml/llm/chat_app/app_modules/overwrites.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | # pylint: disable=relative-beyond-top-level
 4 | from .presets import gr
 5 | from .utils import convert_asis, convert_mdtext, detect_converted_mark
 6 | 
 7 | 
 8 | def postprocess(self, y: list[tuple[str | None, str | None]]) -> list[tuple[str | None, str | None]]:
 9 |     """Each message and response should be a string, which may be in Markdown format.
10 | 
11 |     Returns:
12 |         List of tuples representing the message and response.
13 |         Each message and response will be a string of HTML.
14 | 
15 |     """
16 |     if y is None or y == []:
17 |         return []
18 |     temp = []
19 |     for x in y:
20 |         user, bot = x
21 |         if not detect_converted_mark(user):
22 |             user = convert_asis(user)
23 |         if not detect_converted_mark(bot):
24 |             bot = convert_mdtext(bot)
25 |         temp.append((user, bot))
26 |     return temp
27 | 
28 | 
29 | GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
30 | 


--------------------------------------------------------------------------------
/examples/directml/llm/chat_app/assets/custom.js:
--------------------------------------------------------------------------------
1 | // custom javascript here
2 | 


--------------------------------------------------------------------------------
/examples/directml/llm/chat_app/interface/base_interface.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=disallowed-name
 2 | 
 3 | 
 4 | class BaseLLMInterface:
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     def foo(self):
 9 |         pass
10 | 


--------------------------------------------------------------------------------
/examples/directml/llm/config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | decoder_model = None
 7 | normalization_type = "rms"
 8 | state_dict = {}
 9 | strict_weights_loading = True
10 | hidden_size = 4096
11 | head_dim = 128
12 | intermediate_size = 11008
13 | num_heads = 32
14 | num_key_value_heads = 32
15 | num_layers = 32
16 | vocab_size = 32000
17 | epsilon = 1e-5
18 | model_type = "llama"
19 | apply_residual_connection_post_layernorm = True
20 | model_id = "meta-llama/Llama-2-7b-chat-hf"
21 | partial_rotary_factor = 1.0
22 | max_position_embeddings = 4096
23 | use_bias = False
24 | hidden_act = "silu"
25 | has_up_proj = True
26 | has_input_layernorm_bias = True
27 | has_norm_bias = True
28 | has_lm_head_bias = False
29 | use_split_sigmoid = False
30 | 


--------------------------------------------------------------------------------
/examples/directml/llm/placeholder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/llm/placeholder.png


--------------------------------------------------------------------------------
/examples/directml/llm/requirements.txt:
--------------------------------------------------------------------------------
1 | huggingface-hub
2 | markdown
3 | mdtex2html
4 | optimum
5 | Pygments
6 | sentencepiece
7 | tabulate
8 | torch
9 | 


--------------------------------------------------------------------------------
/examples/directml/squeezenet/README.md:
--------------------------------------------------------------------------------
 1 | # SqueezeNet Latency Optimization with DirectML
 2 | This folder contains a sample use case of Olive to optimize the [SqueezeNet](https://pytorch.org/hub/pytorch_vision_squeezenet/) model using ONNX conversion, conversion to FLOAT16, and general ONNX performance tuning.
 3 | 
 4 | Performs optimization pipeline:
 5 | 
 6 |     PyTorch Model -> [Convert to ONNX] -> [FP16 Conversion] -> [Tune performance] -> Optimized FP16 ONNX Model
 7 | 
 8 | Outputs the best metrics, model, and corresponding Olive config.
 9 | 
10 | ## Optimize SqueezeNet
11 | First, install required packages according to passes.
12 | ```
13 | olive run --config squeezenet_config.json --setup
14 | ```
15 | Then, optimize the model
16 | ```
17 | olive run --config squeezenet_config.json
18 | ```
19 | 
20 | or run simply with python code:
21 | 
22 | ```python
23 | from olive.workflows import run as olive_run
24 | olive_run("squeezenet_config.json")
25 | ```
26 | 


--------------------------------------------------------------------------------
/examples/directml/squeezenet/squeezenet_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "PyTorchModel",
 4 |         "model_loader": "load_pytorch_origin_model",
 5 |         "model_script": "user_script.py",
 6 |         "io_config": {
 7 |             "input_names": [ "input_image" ],
 8 |             "input_shapes": [ [ 1, 3, 224, 224 ] ],
 9 |             "output_names": [ "output" ]
10 |         }
11 |     },
12 |     "systems": { "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu" } ] } },
13 |     "evaluators": {
14 |         "common_evaluator": {
15 |             "metrics": [
16 |                 {
17 |                     "name": "latency",
18 |                     "type": "latency",
19 |                     "sub_types": [ { "name": "avg", "priority": 1 }, { "name": "max" }, { "name": "min" } ]
20 |                 }
21 |             ]
22 |         }
23 |     },
24 |     "passes": {
25 |         "torch_to_onnx": { "type": "OnnxConversion", "target_opset": 13 },
26 |         "float16_conversion": { "type": "OnnxFloatToFloat16" },
27 |         "session_params_tuning": {
28 |             "type": "OrtSessionParamsTuning",
29 |             "device": "gpu",
30 |             "execution_mode_list": [ "ORT_SEQUENTIAL" ],
31 |             "providers_list": [ "DmlExecutionProvider" ]
32 |         }
33 |     },
34 |     "log_severity_level": 0,
35 |     "evaluator": "common_evaluator",
36 |     "evaluate_input_model": false,
37 |     "host": "local_system",
38 |     "target": "local_system",
39 |     "clean_cache": true,
40 |     "cache_dir": "cache"
41 | }
42 | 


--------------------------------------------------------------------------------
/examples/directml/squeezenet/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import torch
 6 | 
 7 | 
 8 | def load_pytorch_origin_model(torch_hub_model_path):
 9 |     return torch.hub.load("pytorch/vision:v0.10.0", "squeezenet1_1", pretrained=True)
10 | 
11 | 
12 | class DataLoader:
13 |     def __init__(self, batch_size):
14 |         self.batch_size = batch_size
15 | 
16 |     def __getitem__(self, idx):
17 |         input_data = torch.rand((self.batch_size, 3, 224, 224), dtype=torch.float16)
18 |         label = None
19 |         return input_data, label
20 | 
21 | 
22 | def create_dataloader(data_dir, batch_size, *args, **kwargs):
23 |     return DataLoader(batch_size)
24 | 


--------------------------------------------------------------------------------
/examples/directml/stable_diffusion/readme/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion/readme/pipeline.png


--------------------------------------------------------------------------------
/examples/directml/stable_diffusion_xl/readme/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion_xl/readme/pipeline.png


--------------------------------------------------------------------------------
/examples/directml/stable_diffusion_xl/readme/sdxl_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion_xl/readme/sdxl_flow.png


--------------------------------------------------------------------------------
/examples/falcon/README.md:
--------------------------------------------------------------------------------
 1 | # Falcon Optimization
 2 | This folder contains a sample use case of Olive to optimize a [falcon-7b](https://huggingface.co/tiiuae/falcon-7b) model using ONNXRuntime tools.
 3 | 
 4 | ## Optimization Workflows
 5 | This workflow performs Falcon optimization on CPU with ONNX Runtime. It performs the optimization pipeline:
 6 | - *PyTorch Model -> Onnx Model -> Transformers Optimized Onnx Model fp16*
 7 | 
 8 | Config file: [config.json](config.json)
 9 | 
10 | ## How to run
11 | ### Pip requirements
12 | Install the necessary python packages:
13 | ```
14 | python -m pip install -r requirements.txt
15 | ```
16 | 
17 | ### Run sample using config
18 | 
19 | The optimization techniques to run are specified in the relevant config json file.
20 | 
21 | First, install required packages according to passes.
22 | ```
23 | olive run --config config.json --setup
24 | ```
25 | 
26 | Then, optimize the model
27 | ```
28 | olive run --config config.json
29 | ```
30 | 
31 | or run simply with python code:
32 | ```python
33 | from olive.workflows import run as olive_run
34 | olive_run("config.json")
35 | ```
36 | 
37 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
38 | You can then select the best model and config from the candidates and run the model with the selected config.
39 | 


--------------------------------------------------------------------------------
/examples/falcon/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | 


--------------------------------------------------------------------------------
/examples/gptj/gptj_inc_dynamic_ptq_cpu.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "EleutherAI/gpt-j-6B" },
 3 |     "data_configs": [
 4 |         {
 5 |             "name": "latency_data_config",
 6 |             "user_script": "user_script.py",
 7 |             "load_dataset_config": { "type": "simple_dataset" },
 8 |             "dataloader_config": { "type": "gptj_dataloader", "batch_size": 1 }
 9 |         }
10 |     ],
11 |     "evaluators": {
12 |         "common_evaluator": {
13 |             "metrics": [
14 |                 {
15 |                     "name": "latency",
16 |                     "type": "latency",
17 |                     "sub_types": [ { "name": "avg", "priority": 1 } ],
18 |                     "data_config": "latency_data_config"
19 |                 }
20 |             ]
21 |         }
22 |     },
23 |     "passes": {
24 |         "conversion": {
25 |             "type": "OnnxConversion",
26 |             "target_opset": 13,
27 |             "save_as_external_data": true,
28 |             "all_tensors_to_one_file": true
29 |         },
30 |         "quantization": {
31 |             "type": "IncDynamicQuantization",
32 |             "save_as_external_data": true,
33 |             "all_tensors_to_one_file": true
34 |         }
35 |     },
36 |     "log_severity_level": 0,
37 |     "evaluator": "common_evaluator",
38 |     "cache_dir": "cache",
39 |     "output_dir": "models/gptj_inc_dynamic_ptq_cpu"
40 | }
41 | 


--------------------------------------------------------------------------------
/examples/gptj/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | neural-compressor
3 | onnxruntime
4 | 


--------------------------------------------------------------------------------
/examples/gte/README.md:
--------------------------------------------------------------------------------
 1 | # GTE-Large-v1.5 Optimization
 2 | This folder contains a sample use case of Olive to optimize a [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) model.
 3 | 
 4 | ## Optimization Workflows
 5 | This workflow performs optimization on CPU with ONNX Runtime. It performs the optimization pipeline:
 6 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model*
 7 | 
 8 | Config file: [config.json](config.json)
 9 | 
10 | ## How to run
11 | ### Run sample using config
12 | 
13 | The optimization techniques to run are specified in the relevant config json file.
14 | 
15 | First, install required packages according to passes.
16 | ```
17 | olive run --config config.json --setup
18 | ```
19 | 
20 | Then, optimize the model
21 | ```
22 | olive run --config config.json
23 | ```
24 | 
25 | or run simply with python code:
26 | ```python
27 | from olive.workflows import run as olive_run
28 | olive_run("config.json")
29 | ```
30 | 
31 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
32 | You can then select the best model and config from the candidates and run the model with the selected config.
33 | 


--------------------------------------------------------------------------------
/examples/gte/user_script.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | from olive.data.registry import Registry
 5 | 
 6 | 
 7 | class TextDataset(Dataset):
 8 |     def __init__(self, text):
 9 |         self.text = text
10 | 
11 |     def __len__(self):
12 |         return len(self.text)
13 | 
14 |     def __getitem__(self, idx):
15 |         sample = self.text[idx]
16 |         input_ids = torch.tensor(sample["input_ids"], dtype=torch.int64)
17 |         token_type_ids = torch.tensor(sample["token_type_ids"], dtype=torch.int64)
18 |         attention_mask = torch.tensor(sample["attention_mask"], dtype=torch.int64)
19 | 
20 |         return {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask}, idx
21 | 
22 | 
23 | @Registry.register_pre_process()
24 | def dataset_pre_process(dataset, **kwargs):
25 |     from transformers import AutoTokenizer
26 | 
27 |     max_samples = kwargs.get("max_samples", 128)
28 |     model_name = kwargs.get("model_name")
29 |     texts = []
30 |     for i, sample in enumerate(dataset):
31 |         if i >= max_samples:
32 |             break
33 |         tokenizer = AutoTokenizer.from_pretrained(model_name)
34 |         batch_dict = tokenizer(sample["text"], max_length=8192, padding=True, truncation=True)
35 |         texts.append(
36 |             {
37 |                 "input_ids": batch_dict["input_ids"],
38 |                 "token_type_ids": batch_dict["token_type_ids"],
39 |                 "attention_mask": batch_dict["attention_mask"],
40 |             }
41 |         )
42 |     return TextDataset(texts)
43 | 


--------------------------------------------------------------------------------
/examples/llama2/.gitignore:
--------------------------------------------------------------------------------
1 | llama2_cpu*
2 | llama2_gpu*
3 | llama2_model_builder.json
4 | 


--------------------------------------------------------------------------------
/examples/llama2/conda_gpu.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.9.21
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - accelerate
 9 |       - bitsandbytes
10 |       - peft
11 |       - sentencepiece
12 |       - datasets
13 |       - evaluate
14 |       - psutil
15 |       - optimum
16 |       - scipy
17 |       - scikit-learn
18 |       - onnxruntime-genai
19 |       - torch
20 |       - --extra-index-url https://download.pytorch.org/whl/cu118
21 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]
22 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_generate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "meta-llama/Llama-2-7b-hf" },
 3 |     "data_configs": [
 4 |         {
 5 |             "name": "generation_latency_dummy_data",
 6 |             "type": "TransformersPromptDummyDataContainer",
 7 |             "load_dataset_config": { "generative": true }
 8 |         }
 9 |     ],
10 |     "systems": {
11 |         "local_system": {
12 |             "type": "LocalSystem",
13 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
14 |         }
15 |     },
16 |     "evaluators": {
17 |         "merged_evaluator": {
18 |             "metrics": [
19 |                 {
20 |                     "name": "latency_prompt_processing",
21 |                     "type": "latency",
22 |                     "sub_types": [ { "name": "avg", "priority": 1 } ],
23 |                     "data_config": "generation_latency_dummy_data",
24 |                     "user_config": { "io_bind": true, "run_kwargs": { "max_new_tokens": 64 } }
25 |                 }
26 |             ]
27 |         }
28 |     },
29 |     "passes": {  },
30 |     "auto_optimizer_config": { "disable_auto_optimizer": true },
31 |     "evaluator": "merged_evaluator",
32 |     "host": "local_system",
33 |     "target": "local_system",
34 |     "cache_dir": "cache",
35 |     "output_dir": "models/llama2_generate"
36 | }
37 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_lmeval.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "HfModel",
 4 |         "model_path": "meta-llama/Llama-2-7b-hf",
 5 |         "load_kwargs": { "attn_implementation": "eager" }
 6 |     },
 7 |     "systems": {
 8 |         "local_system": {
 9 |             "type": "LocalSystem",
10 |             "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ]
11 |         }
12 |     },
13 |     "evaluators": {
14 |         "evaluator": { "type": "LMEvaluator", "tasks": [ "hellaswag" ], "batch_size": 1, "limit": 4, "max_length": 128 }
15 |     },
16 |     "evaluator": "evaluator",
17 |     "host": "local_system",
18 |     "target": "local_system",
19 |     "cache_dir": "cache",
20 |     "output_dir": "models",
21 |     "clean_cache": true
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_lmeval_onnx.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "HfModel",
 4 |         "model_path": "meta-llama/Llama-2-7b-hf",
 5 |         "load_kwargs": { "attn_implementation": "eager" }
 6 |     },
 7 |     "systems": {
 8 |         "local_system": {
 9 |             "type": "LocalSystem",
10 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
11 |         }
12 |     },
13 |     "passes": {
14 |         "mb": { "type": "ModelBuilder", "precision": "int4", "search": { "max_length": 2048, "min_length": 0 } }
15 |     },
16 |     "evaluators": {
17 |         "evaluator": { "type": "LMEvaluator", "tasks": [ "hellaswag" ], "batch_size": 1, "limit": 4, "max_length": 128 }
18 |     },
19 |     "evaluator": "evaluator",
20 |     "host": "local_system",
21 |     "target": "local_system",
22 |     "cache_dir": "cache",
23 |     "output_dir": "models",
24 |     "clean_cache": true
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_model_builder_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "<model_name_placeholder>" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "data_configs": [ { "name": "transformer_token_dummy_data", "type": "TransformersTokenDummyDataContainer" } ],
10 |     "passes": {
11 |         "conversion": {
12 |             "type": "OnnxConversion",
13 |             "target_opset": 16,
14 |             "save_as_external_data": true,
15 |             "all_tensors_to_one_file": true,
16 |             "save_metadata_for_token_generation": true
17 |         },
18 |         "builder": { "type": "ModelBuilder", "precision": "int4", "search": { "max_length": 2048, "min_length": 0 } },
19 |         "metadata": {
20 |             "type": "ModelBuilder",
21 |             "precision": "int4",
22 |             "metadata_only": true,
23 |             "search": { "max_length": 2048, "min_length": 0 }
24 |         },
25 |         "session_params_tuning": {
26 |             "type": "OrtSessionParamsTuning",
27 |             "data_config": "transformer_token_dummy_data",
28 |             "io_bind": true
29 |         }
30 |     },
31 |     "packaging_config": [ { "type": "Zipfile", "name": "OutputModel" } ],
32 |     "log_severity_level": 0,
33 |     "host": "local_system",
34 |     "target": "local_system",
35 |     "cache_dir": "cache",
36 |     "output_dir": null
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_split.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "HfModel",
 4 |         "load_kwargs": { "attn_implementation": "eager" },
 5 |         "model_path": "meta-llama/Llama-2-7b-hf"
 6 |     },
 7 |     "systems": {
 8 |         "local_system": {
 9 |             "type": "LocalSystem",
10 |             "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ]
11 |         }
12 |     },
13 |     "passes": {
14 |         "s": { "type": "CaptureSplitInfo", "num_splits": 3 },
15 |         "c": { "type": "OnnxConversion", "target_opset": 17, "torch_dtype": "float32" },
16 |         "sm": { "type": "SplitModel" }
17 |     },
18 |     "host": "local_system",
19 |     "target": "local_system",
20 |     "output_dir": "models/llama2_split"
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/llama2/llama2_tensor_parallel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "meta-llama/Llama-2-7b-hf" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "passes": {
10 |         "tensor_parallel": {
11 |             "type": "PyTorchTensorParallel",
12 |             "user_script": "llama2_tensor_parallel.py",
13 |             "class_name": "LlamaPyTorchTensorParallel",
14 |             "world_size": 4
15 |         },
16 |         "conversion": {
17 |             "type": "OnnxConversion",
18 |             "target_opset": 17,
19 |             "save_as_external_data": true,
20 |             "all_tensors_to_one_file": true
21 |         },
22 |         "transformers_optimization_fp16": {
23 |             "type": "OrtTransformersOptimization",
24 |             "save_as_external_data": true,
25 |             "all_tensors_to_one_file": true,
26 |             "model_type": "gpt2",
27 |             "opt_level": 0,
28 |             "only_onnxruntime": false,
29 |             "keep_io_types": false,
30 |             "float16": true,
31 |             "use_gqa": true
32 |         }
33 |     },
34 |     "host": "local_system",
35 |     "target": "local_system",
36 |     "cache_dir": "cache",
37 |     "output_dir": "models/tensor_parallel"
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/llama2/notebook/llama2/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.9.21
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - accelerate
 9 |       - azure-keyvault-secrets
10 |       - azure-identity
11 |       - bitsandbytes
12 |       - datasets
13 |       - huggingface_hub
14 |       - optimum
15 |       - peft
16 |       - scipy
17 |       - sentencepiece
18 |       - torch==2.0.1
19 |       - transformers>=4.33.2,<= 4.37.2
20 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu]
21 | 


--------------------------------------------------------------------------------
/examples/llama2/notebook/llama2_multiep/llama2.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import argparse
 6 | import json
 7 | from pathlib import Path
 8 | 
 9 | if __name__ == "__main__":
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument(
12 |         "--device",
13 |         choices=["cpu", "gpu", "multi_ep"],
14 |         help="Device to use",
15 |     )
16 |     parser.add_argument(
17 |         "--quantize",
18 |         action="store_true",
19 |         help="If set, run transformers optimization pass",
20 |     )
21 |     args = parser.parse_args()
22 | 
23 |     input_filename = f"config_{args.device}.template.json"
24 |     with Path(input_filename).open("r") as f:
25 |         config = json.load(f)
26 | 
27 |     if not args.quantize:
28 |         del config["passes"]["blockwise_quant_int4"]
29 | 
30 |     output_filename = input_filename.replace(".template", "")
31 |     with Path(output_filename).open("w") as strm:
32 |         json.dump(config, fp=strm, indent=4)
33 | 


--------------------------------------------------------------------------------
/examples/llama2/notebook/llama2_multiep/multiple_ep_requirements.txt:
--------------------------------------------------------------------------------
1 | tabulate
2 | 


--------------------------------------------------------------------------------
/examples/llama2/requirements-gptq.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | auto-gptq
3 | 


--------------------------------------------------------------------------------
/examples/llama2/requirements-pipeline.txt:
--------------------------------------------------------------------------------
1 | -r requirements-qlora.txt
2 | azure-ai-ml
3 | azure-identity
4 | azure-keyvault-secrets
5 | azureml-fsspec
6 | huggingface_hub
7 | 


--------------------------------------------------------------------------------
/examples/llama2/requirements-qlora.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | # transformers>=4.33.2,<= 4.37.2 not compatible with latest accelerate
3 | accelerate<1.0.0
4 | bitsandbytes==0.43.3
5 | onnxruntime_genai
6 | peft
7 | scikit-learn
8 | sentencepiece
9 | 


--------------------------------------------------------------------------------
/examples/llama2/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets>=2.8.0
2 | onnx>=1.14.0
3 | optimum>=1.17.0
4 | torch
5 | # transformers optimizer fusions don't match in newer versions
6 | transformers>=4.33.2,<= 4.37.2
7 | 


--------------------------------------------------------------------------------
/examples/mistral/mistral_fp16.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "mistralai/Mistral-7B-v0.1" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "data_configs": [ { "name": "transformer_token_dummy_data", "type": "TransformersTokenDummyDataContainer" } ],
10 |     "evaluators": {
11 |         "common_evaluator": {
12 |             "metrics": [
13 |                 {
14 |                     "name": "latency",
15 |                     "type": "latency",
16 |                     "sub_types": [ { "name": "avg", "priority": 1 } ],
17 |                     "data_config": "transformer_token_dummy_data",
18 |                     "user_config": { "io_bind": true }
19 |                 }
20 |             ]
21 |         }
22 |     },
23 |     "passes": {
24 |         "convert": { "type": "ModelBuilder", "precision": "fp16" },
25 |         "session_params_tuning": {
26 |             "type": "OrtSessionParamsTuning",
27 |             "data_config": "transformer_token_dummy_data",
28 |             "io_bind": true,
29 |             "enable_profiling": false
30 |         }
31 |     },
32 |     "evaluate_input_model": false,
33 |     "evaluator": "common_evaluator",
34 |     "host": "local_system",
35 |     "target": "local_system",
36 |     "cache_dir": "cache",
37 |     "output_dir": "models/mistral_fp16"
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/mistral/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets
 2 | neural-compressor>=2.4.1
 3 | onnxruntime-genai-cuda
 4 | onnxruntime-gpu
 5 | onnxruntime_extensions
 6 | # optimum 1.17.0 for fp16 inference
 7 | optimum>=1.17.0
 8 | tabulate
 9 | transformers>=4.34.99
10 | 


--------------------------------------------------------------------------------
/examples/mobilenet/.gitignore:
--------------------------------------------------------------------------------
1 | output/
2 | tmp/
3 | 
4 | mobilenet_*eval.json
5 | raw_qnn_sdk_config.json
6 | 


--------------------------------------------------------------------------------
/examples/mobilenet/onnx/README.md:
--------------------------------------------------------------------------------
 1 | # TIMM Model Optimization (Quantization & QDQ)
 2 | This folder contains examples of **TIMM (PyTorch Image Models) optimization** using **Olive workflows**, focusing on **ONNX conversion, quantization, and QDQ transformation**.
 3 | 
 4 | ## **Optimization Workflow**
 5 | This example optimizes `timm/mobilenetv3_small_100.lamb_in1k` for **CPU execution** by:
 6 | - *Converting PyTorch model to ONNX*
 7 | - *Applying ONNX quantization*
 8 | - *Applying QDQ (Quantize-DeQuantize) transformation*
 9 | 
10 | - **Model**: [timm/mobilenetv3_small_100.lamb_in1k](https://huggingface.co/timm/mobilenetv3_small_100.lamb_in1k)
11 | - **Dataset**: [ImageNet-1K](https://huggingface.co/datasets/imagenet-1k)
12 | 
13 | ---
14 | 
15 | ## **Running the Optimization**
16 | ### **Running with Config File**
17 | The provided `config.json` configuration performs **ONNX conversion, quantization, and QDQ transformation**.
18 | 
19 | **Install Required Dependencies**
20 | ```sh
21 | pip install -r requirements.txt
22 | olive run --config config.json --setup
23 | ```
24 | **Run Model Optimization**
25 | ```sh
26 | olive run --config config.json
27 | ```
28 | 
29 | After running the above command, the model candidates and corresponding config will be saved in the output directory.
30 | You can then select the best model and config from the candidates and run the model with the selected config.
31 | 
32 | 


--------------------------------------------------------------------------------
/examples/mobilenet/onnx/requirements.txt:
--------------------------------------------------------------------------------
1 | evaluate
2 | scikit-learn
3 | timm
4 | 


--------------------------------------------------------------------------------
/examples/mobilenet/onnx/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import timm
 6 | 
 7 | 
 8 | def load_timm(model_name: str):
 9 |     model = timm.create_model(model_name, pretrained=True)
10 |     return model.eval()
11 | 


--------------------------------------------------------------------------------
/examples/mobilenet/qnn/README.md:
--------------------------------------------------------------------------------
 1 | # MobileNet optimization with QDQ Quantization on Qualcomm NPU
 2 | This folder contains a sample use case of Olive to optimize a MobileNet model for Qualcomm NPU (QNN Execution Provider) using static QDQ quantization.
 3 | 
 4 | This example requires an x86 python environment on a Windows ARM machine.
 5 | 
 6 | 
 7 | ## Prerequisites
 8 | ### Clone the repository and install Olive
 9 | 
10 | Refer to the instructions in the [examples README](../README.md) to clone the repository and install Olive.
11 | 
12 | ### Install onnxruntime-qnn
13 | ```bash
14 | python -m pip install onnxruntime-qnn
15 | ```
16 | 
17 | ### Pip requirements
18 | Install the necessary python packages:
19 | ```
20 | python -m pip install -r requirements.txt
21 | ```
22 | 
23 | ### Download data and model
24 | To download the necessary data and model files:
25 | ```
26 | python download_files.py
27 | ```
28 | 
29 | ## Run the sample
30 | Run the following command to quantize the model and evaluate it on the NPU:
31 | ```bash
32 | olive run --config mobilenet_qnn_ep.json
33 | ```
34 | 
35 | **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the `mobilenet_qnn_ep.json` configuration file to skip the evaluation step.
36 | 


--------------------------------------------------------------------------------
/examples/mobilenet/qnn/requirements.txt:
--------------------------------------------------------------------------------
1 | packaging
2 | pillow
3 | scipy
4 | torchvision
5 | 


--------------------------------------------------------------------------------
/examples/open_llama/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.9.21
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - datasets
 9 |       - optimum
10 |       - sentencepiece
11 |       - transformers
12 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu]
13 | 


--------------------------------------------------------------------------------
/examples/open_llama/requirements-arc.txt:
--------------------------------------------------------------------------------
1 | azure-ai-ml>=1.11.1
2 | azure-identity
3 | azureml-fsspec
4 | 


--------------------------------------------------------------------------------
/examples/open_llama/requirements-sparsegpt.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | torch-tensorrt
3 | 


--------------------------------------------------------------------------------
/examples/open_llama/requirements-woq.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | intel-extension-for-transformers
3 | lm-eval==0.4.2
4 | neural-compressor>=2.3
5 | onnxruntime
6 | optimum
7 | sentencepiece
8 | transformers
9 | 


--------------------------------------------------------------------------------
/examples/open_llama/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | optimum
3 | sentencepiece
4 | 


--------------------------------------------------------------------------------
/examples/opt_125m/awq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "passes": { "4bit_awq_quantizer": { "type": "AutoAWQQuantizer" } },
10 |     "host": "local_system",
11 |     "target": "local_system",
12 |     "cache_dir": "cache",
13 |     "output_dir": "models/awq"
14 | }
15 | 


--------------------------------------------------------------------------------
/examples/opt_125m/awq_onnx.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "passes": {
10 |         "4bit_awq_quantizer": { "type": "AutoAWQQuantizer" },
11 |         "conversion_merged": { "type": "OnnxConversion", "device": "cuda", "torch_dtype": "float32" },
12 |         "transformers_optimization_fp16": {
13 |             "type": "OrtTransformersOptimization",
14 |             "model_type": "gpt2",
15 |             "opt_level": 0,
16 |             "keep_io_types": false,
17 |             "float16": true
18 |         }
19 |     },
20 |     "host": "local_system",
21 |     "target": "local_system",
22 |     "cache_dir": "cache",
23 |     "output_dir": "models/awq_onnx"
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/opt_125m/gptq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "data_configs": [
10 |         {
11 |             "name": "wikitext2_train",
12 |             "type": "HuggingfaceContainer",
13 |             "load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
14 |             "pre_process_data_config": { "add_special_tokens": false, "max_samples": 128 }
15 |         }
16 |     ],
17 |     "passes": { "gptq_quant_int4": { "type": "GptqQuantizer", "data_config": "wikitext2_train" } },
18 |     "host": "local_system",
19 |     "target": "local_system",
20 |     "cache_dir": "cache",
21 |     "output_dir": "models/gptq"
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/opt_125m/gptq_onnx.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ]
 7 |         }
 8 |     },
 9 |     "data_configs": [
10 |         {
11 |             "name": "wikitext2_train",
12 |             "type": "HuggingfaceContainer",
13 |             "load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
14 |             "pre_process_data_config": { "add_special_tokens": false, "max_samples": 128 }
15 |         }
16 |     ],
17 |     "passes": {
18 |         "gptq_quant_int4": { "type": "GptqQuantizer", "data_config": "wikitext2_train" },
19 |         "conversion_merged": { "type": "OnnxConversion", "device": "cuda", "torch_dtype": "float32" },
20 |         "transformers_optimization_fp16": {
21 |             "type": "OrtTransformersOptimization",
22 |             "model_type": "gpt2",
23 |             "opt_level": 0,
24 |             "keep_io_types": false,
25 |             "float16": true
26 |         }
27 |     },
28 |     "host": "local_system",
29 |     "target": "local_system",
30 |     "cache_dir": "cache",
31 |     "output_dir": "models/gptq_onnx"
32 | }
33 | 


--------------------------------------------------------------------------------
/examples/opt_125m/requirements-awq.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | autoawq
3 | 


--------------------------------------------------------------------------------
/examples/opt_125m/requirements-gptq.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | auto-gptq
3 | 


--------------------------------------------------------------------------------
/examples/opt_125m/requirements.txt:
--------------------------------------------------------------------------------
1 | optimum
2 | transformers
3 | 


--------------------------------------------------------------------------------
/examples/phi2/.gitignore:
--------------------------------------------------------------------------------
1 | phi2/*
2 | phi2_optimize.json
3 | 


--------------------------------------------------------------------------------
/examples/phi2/phi2_genai.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "HfModel", "model_path": "microsoft/phi-2" },
 3 |     "systems": {
 4 |         "local_system": {
 5 |             "type": "LocalSystem",
 6 |             "accelerators": [
 7 |                 { "device": "GPU", "execution_providers": [ "CPUExecutionProvider", "CUDAExecutionProvider" ] }
 8 |             ]
 9 |         }
10 |     },
11 |     "passes": { "builder": { "type": "ModelBuilder", "precision": "int4" } },
12 |     "host": "local_system",
13 |     "target": "local_system",
14 |     "cache_dir": "cache",
15 |     "output_dir": "models/model_builder"
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/phi2/requirements-lora.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | accelerate
3 | bitsandbytes
4 | peft
5 | scikit-learn
6 | 


--------------------------------------------------------------------------------
/examples/phi2/requirements-pipeline.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | einops
 3 | onnx>=1.15.0
 4 | onnxruntime-genai
 5 | onnxscript>=0.1.0.dev20240126
 6 | scikit-learn
 7 | torch>=2.2.0
 8 | # onnxruntime-genai 0.5.2 is not compatible with 4.48.0
 9 | # need to wait it pick up commit
10 | # https://github.com/microsoft/onnxruntime-genai/commit/c61aaa6b2349b39ca63509914b4c02105b462a4a
11 | transformers>=4.36.2, <4.48.0
12 | 


--------------------------------------------------------------------------------
/examples/phi2/requirements-slicegpt.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | git+https://github.com/microsoft/TransformerCompression.git
3 | 


--------------------------------------------------------------------------------
/examples/phi2/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | onnx>=1.15.0
3 | onnxscript>=0.1.0.dev20240126
4 | torch>=2.2.0
5 | transformers>=4.36.2
6 | 


--------------------------------------------------------------------------------
/examples/phi3/.gitignore:
--------------------------------------------------------------------------------
1 | phi3_run_*.json
2 | 


--------------------------------------------------------------------------------
/examples/phi3/phi3_nvmo_ptq.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "HfModel",
 4 |         "model_path": "microsoft/Phi-3-mini-4k-instruct",
 5 |         "task": "text-classification"
 6 |     },
 7 |     "systems": {
 8 |         "local_system": {
 9 |             "type": "LocalSystem",
10 |             "accelerators": [ { "device": "gpu", "execution_providers": [ "DmlExecutionProvider" ] } ]
11 |         }
12 |     },
13 |     "engine": { "target": "local_system" },
14 |     "passes": {
15 |         "builder": { "type": "ModelBuilder", "precision": "fp16" },
16 |         "quantization": {
17 |             "type": "NVModelOptQuantization",
18 |             "algorithm": "awq",
19 |             "tokenizer_dir": "microsoft/Phi-3-mini-4k-instruct",
20 |             "calibration": "awq_lite"
21 |         }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/phi3/requirements-awq.txt:
--------------------------------------------------------------------------------
1 | autoawq
2 | onnxruntime-genai
3 | transformers
4 | 


--------------------------------------------------------------------------------
/examples/phi3/requirements-nvmo-awq.txt:
--------------------------------------------------------------------------------
1 | cppimport==22.8.2
2 | cupy-cuda12x
3 | datasets>=2.14.4
4 | torch
5 | transformers
6 | 


--------------------------------------------------------------------------------
/examples/phi3/requirements-quarot.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | git+https://github.com/microsoft/TransformerCompression.git@main
3 | 


--------------------------------------------------------------------------------
/examples/phi3/requirements-vision.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | huggingface_hub[cli]
3 | pillow
4 | requests
5 | 


--------------------------------------------------------------------------------
/examples/phi3/requirements.txt:
--------------------------------------------------------------------------------
1 | einops
2 | olive-ai>=0.6.0
3 | onnx>=1.15.0
4 | onnxruntime>=1.18.0
5 | onnxruntime-genai>=0.2.0
6 | onnxscript>=0.1.0.dev20240126
7 | torch>=2.2.0
8 | transformers>=4.36.2
9 | 


--------------------------------------------------------------------------------
/examples/phi3/vision/config_templates/text_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "HfModel",
 4 |         "model_path": "<<str: model_path>>",
 5 |         "load_kwargs": { "trust_remote_code": true }
 6 |     },
 7 |     "passes": { "builder": { "type": "ModelBuilder", "precision": "int4", "exclude_embeds": true } },
 8 |     "systems": {
 9 |         "local_system": {
10 |             "type": "LocalSystem",
11 |             "accelerators": [ { "device": "CPU", "execution_providers": [ "CPUExecutionProvider" ] } ]
12 |         }
13 |     },
14 |     "host": "local_system",
15 |     "target": "local_system"
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/phi3/vision/config_templates/text_embedding_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "PyTorchModel",
 4 |         "model_path": "<<str: model_path>>",
 5 |         "model_loader": "text_embedding_loader",
 6 |         "model_script": "vision/scripts/user_script.py",
 7 |         "io_config": {
 8 |             "input_names": [ "input_ids" ],
 9 |             "input_types": [ "int64" ],
10 |             "input_shapes": [ [ 1, 1 ] ],
11 |             "output_names": [ "inputs_embeds" ],
12 |             "dynamic_axes": {
13 |                 "input_ids": { "0": "batch_size", "1": "sequence_length" },
14 |                 "inputs_embeds": { "0": "batch_size", "1": "sequence_length" }
15 |             }
16 |         }
17 |     },
18 |     "systems": {
19 |         "local_system": {
20 |             "type": "LocalSystem",
21 |             "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ]
22 |         }
23 |     },
24 |     "passes": {
25 |         "convert": {
26 |             "type": "OnnxConversion",
27 |             "save_as_external_data": true,
28 |             "all_tensors_to_one_file": true,
29 |             "convert_attribute": false,
30 |             "size_threshold": 0,
31 |             "target_opset": 14,
32 |             "torch_dtype": "<<str: float_precision>>"
33 |         }
34 |     },
35 |     "host": "local_system",
36 |     "target": "local_system"
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/phi3/vision/scripts/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | import requests
 7 | from PIL import Image
 8 | from transformers import AutoModelForCausalLM, AutoProcessor
 9 | 
10 | 
11 | def vision_embed_tokens_loader(model_name):
12 |     model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
13 |     return model.model.vision_embed_tokens
14 | 
15 | 
16 | def get_dummy_inputs(model=None):
17 |     processor = AutoProcessor.from_pretrained(model.model_path, trust_remote_code=True)
18 |     user_prompt = "<|user|>\n"
19 |     assistant_prompt = "<|assistant|>\n"
20 |     prompt_suffix = "<|end|>\n"
21 |     prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}"
22 |     url = "https://www.ilankelman.org/stopsigns/australia.jpg"
23 |     image = Image.open(requests.get(url, stream=True, timeout=10).raw)
24 |     inputs = processor(prompt, image, return_tensors="pt")
25 |     return (
26 |         inputs["pixel_values"],
27 |         inputs["image_sizes"],
28 |     )
29 | 
30 | 
31 | def text_embedding_loader(model_name):
32 |     model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
33 |     return model.model.embed_tokens
34 | 


--------------------------------------------------------------------------------
/examples/phi3_5/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | optimum
3 | 


--------------------------------------------------------------------------------
/examples/phi4/README.md:
--------------------------------------------------------------------------------
1 | # Phi-4 Model Optimization
2 | 
3 | This repository demonstrates the optimization of the [Microsoft Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning), [Microsoft Phi-4-reasoning-plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) and [Microsoft Phi-4-mini-reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) models using **post-training quantization (PTQ)** techniques.
4 | 
5 | ## **PTQ + AOT Compilation for Intel® NPUs using Optimum Intel®**
6 | 
7 | - [**Intel® NPU**](./openvino/): Optimization with Optimum Intel® on Intel® NPU to generate an ONNX OpenVINO IR Encapsulated Model instructions are in the [openvino](./openvino/) folder.
8 | 


--------------------------------------------------------------------------------
/examples/phi4/openvino/README.md:
--------------------------------------------------------------------------------
 1 | # Phi-4 Model Optimization
 2 | 
 3 | This folder contains examples of optimization of the [Microsoft Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning), [Microsoft Phi-4-reasoning-plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) and [Microsoft Phi-4-mini-reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) models.
 4 | 
 5 | Optimization with Optimum Intel® on Intel® NPU to generate an ONNX OpenVINO IR Encapsulated Model instructions are in the following folders:-
 6 | 
 7 | - Intel® NPU: [Optimization for Microsoft Phi-4-reasoning](./phi_4_reasoning/)
 8 | - Intel® NPU: [Optimization for Microsoft Phi-4-reasoning-plus](./phi_4_reasoning_plus/)
 9 | - Intel® NPU: [Optimization for Microsoft Phi-4-mini-reasoning](./phi_4_mini_reasoning/)
10 | 


--------------------------------------------------------------------------------
/examples/qwen2_5/README.md:
--------------------------------------------------------------------------------
 1 | # Qwen 2.5 Optimization
 2 | 
 3 | Sample use cases of Olive to optimize a [Qwen/Qwen 2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) model using Olive.
 4 | 
 5 | - [QDQ Model with 4-bit Weights & 16-bit Activations](../phi3_5/README.md):
 6 |   - Run the workflow with `olive run --config qdq_config.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-qdq`.
 7 | - [AMD NPU: Optimization and Quantization with for VitisAI](../phi3_5/README.md):
 8 |   - Run the workflow with `olive run --config qdq_config_vitis_ai.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-vai`.
 9 | - [PTQ + AOT Compilation for Qualcomm NPUs using QNN EP](../phi3_5/README.md):
10 |   - Run the workflow with `olive run --config qnn_config.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-qnn`.
11 |   - Run the inference with `python app.py -m models/qwen2_5-qnn -c "<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n"`.
12 | - [PTQ + AWQ ONNX OVIR Encapsulated 4-bit weight compression using Intel® Optimum OpenVINO](./openvino/)
13 | 


--------------------------------------------------------------------------------
/examples/red_pajama/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime-gpu>=1.15.1
2 | optimum>=1.11.0
3 | torch>=2.0.0
4 | transformers>=4.31.0
5 | 


--------------------------------------------------------------------------------
/examples/red_pajama/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import transformers
 6 | 
 7 | MIN_TRANSFORMERS_VERSION = "4.30.2"
 8 | 
 9 | # check transformers version
10 | assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, (
11 |     f"Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher."
12 | )
13 | 


--------------------------------------------------------------------------------
/examples/resnet/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.9.21
 6 |   - pip=20.2
 7 |   - pip:
 8 |       - onnxruntime
 9 |       - pytorch-lightning
10 |       - psutil
11 |       - scipy
12 |       - tabulate
13 |       - torchvision
14 |       - git+https://github.com/microsoft/Olive#egg=olive-ai[cpu]
15 | 


--------------------------------------------------------------------------------
/examples/resnet/multiple_ep_requirements.txt:
--------------------------------------------------------------------------------
1 | azure-ai-ml
2 | azure-identity
3 | azureml-fsspec
4 | pytorch-lightning
5 | scipy
6 | tabulate
7 | torchvision
8 | 


--------------------------------------------------------------------------------
/examples/resnet/openvino/requirements.txt:
--------------------------------------------------------------------------------
1 | olive-ai[openvino]
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/examples/resnet/qnn/README.md:
--------------------------------------------------------------------------------
1 | # ResNet Optimization with PTQ on Qualcomm NPU using QNN EP
2 | This example performs ResNetoptimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline:
3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model*
4 | 
5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.
6 | 
7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.
8 | 


--------------------------------------------------------------------------------
/examples/resnet/qnn/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | tabulate
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/examples/resnet/requirements.txt:
--------------------------------------------------------------------------------
 1 | azure-ai-ml
 2 | azure-identity
 3 | azureml-fsspec
 4 | datasets
 5 | psutil
 6 | pytorch-lightning
 7 | scipy
 8 | tabulate
 9 | torchvision
10 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/.gitignore:
--------------------------------------------------------------------------------
1 | /footprints/
2 | /result_*.png
3 | /quantize_data*/
4 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/assets/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/assets/dog.png


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/.gitignore:
--------------------------------------------------------------------------------
1 | adapters
2 | onnx_model
3 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/image/result_pen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_pen.png


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/image/result_pen_merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_pen_merge.png


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/image/result_wolf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_wolf.png


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/image/result_wolf_merge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_wolf_merge.png


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/vae_decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffusers import AutoencoderKL
 3 | 
 4 | 
 5 | def vae_decoder_inputs(batch_size, torch_dtype):
 6 |     return {
 7 |         "latent_sample": torch.rand((batch_size, 4, 128, 128), dtype=torch_dtype),
 8 |         "return_dict": False,
 9 |     }
10 | 
11 | 
12 | def _dummy_inputs(model=None):
13 |     return tuple(vae_decoder_inputs(1, torch.float32).values())
14 | 
15 | 
16 | def _model_loader(model_name):
17 |     model = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
18 |     model.forward = model.decode
19 |     return model
20 | 
21 | 
22 | def _io_config(model):
23 |     return {
24 |         "input_names": ["latent_sample", "return_dict"],
25 |         "output_names": ["sample"],
26 |         "dynamic_axes": {"latent_sample": {"0": "batch", "1": "channels", "2": "height", "3": "width"}},
27 |     }
28 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/notebook/vae_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffusers import AutoencoderKL
 3 | 
 4 | 
 5 | def vae_encoder_inputs(batch_size, torch_dtype):
 6 |     return {
 7 |         "sample": torch.rand((batch_size, 3, 1024, 1024), dtype=torch_dtype),
 8 |         "return_dict": False,
 9 |     }
10 | 
11 | 
12 | def _dummy_inputs(model=None):
13 |     return tuple(vae_encoder_inputs(1, torch.float32).values())
14 | 
15 | 
16 | def _model_loader(model_name):
17 |     model = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
18 |     model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample()
19 |     return model
20 | 
21 | 
22 | def _io_config(model):
23 |     return {
24 |         "input_names": ["latent_sample", "return_dict"],
25 |         "output_names": ["sample"],
26 |         "dynamic_axes": {
27 |             "latent_sample": {"0": "batch_size", "1": "num_channels_latent", "2": "height_latent", "3": "width_latent"},
28 |             "sample": {"0": "batch_size", "1": "num_channels", "2": "height", "3": "width"},
29 |         },
30 |     }
31 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/requirements-common.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | # breaking changes in diffusers lora implementation
 3 | diffusers<0.30.0
 4 | onnx
 5 | pillow
 6 | tabulate
 7 | torch
 8 | # StableDiffusionSafetyChecker vision_model ignores attn_implementation
 9 | transformers<4.43.0
10 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/requirements-ov.txt:
--------------------------------------------------------------------------------
1 | diffusers
2 | opencv-python
3 | pillow
4 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements-common.txt
2 | onnxruntime-directml>=1.16.0
3 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion/sd_utils/config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | vae_sample_size = 512
 7 | unet_sample_size = 64
 8 | cross_attention_dim = 768
 9 | only_conversion = False
10 | data_dir = "quantize_data"
11 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion_xl/.gitignore:
--------------------------------------------------------------------------------
1 | /footprints/
2 | /result_*.png
3 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion_xl/config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | vae_sample_size = 1024
 7 | unet_sample_size = 128
 8 | cross_attention_dim = 2048
 9 | time_ids_size = 6
10 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion_xl/requirements-common.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | diffusers
 3 | invisible-watermark
 4 | onnx
 5 | optimum
 6 | pillow
 7 | torch
 8 | # StableDiffusionSafetyChecker vision_model ignores attn_implementation
 9 | transformers<4.43.0
10 | 


--------------------------------------------------------------------------------
/examples/stable_diffusion_xl/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements-common.txt
2 | onnxruntime-directml>=1.16.2
3 | 


--------------------------------------------------------------------------------
/examples/super_resolution/README.md:
--------------------------------------------------------------------------------
 1 | # Super Resolution Optimization with OnnxRuntime extension
 2 | This folder demonstrates an examples of using OnnxRuntime extension to optimize Super Resolution.
 3 | Visit [OnnxRuntime Extension](https://github.com/microsoft/onnxruntime-extensions) for installation and
 4 |  usage instructions.
 5 | Visit [Super Resolution with OnnxRuntime](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html)
 6 |  for detailed information.
 7 | 
 8 | ## Using OnnxRuntime extension with Olive
 9 | Olive includes a specific pass `AppendPrePostProcessingOps` to append pre- and post- processing operations to exported
10 |  ONNX model.
11 | 
12 | ```json
13 | "passes": {
14 |     "prepost": {
15 |         "type": "AppendPrePostProcessingOps",
16 |         "tool_command": "superresolution",
17 |         "tool_command_args": {
18 |             "output_format": "png"
19 |         }
20 |     }
21 | }
22 | ```
23 | 
24 | ## How to run
25 | ### Pip requirements
26 | Install the necessary python packages:
27 | ```sh
28 | python -m pip install -r requirements.txt
29 | ```
30 | 
31 | ### Run sample using config
32 | ```sh
33 | olive run --config config.json
34 | ```
35 | 
36 | or run simply with python code:
37 | ```python
38 | from olive.workflows import run as olive_run
39 | olive_run("config.json")
40 | ```
41 | 
42 | After running the above command, the model and corresponding config will be saved in the output directory.
43 | 


--------------------------------------------------------------------------------
/examples/super_resolution/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "PyTorchModel",
 4 |         "model_loader": "load_pytorch_model",
 5 |         "model_script": "loader.py",
 6 |         "io_config": {
 7 |             "input_names": [ "input" ],
 8 |             "input_shapes": [ [ 1, 1, 224, 224 ] ],
 9 |             "input_types": [ "float32" ],
10 |             "output_names": [ "output" ]
11 |         }
12 |     },
13 |     "passes": {
14 |         "exporter": { "type": "OnnxConversion", "target_opset": 15 },
15 |         "prepost": {
16 |             "type": "AppendPrePostProcessingOps",
17 |             "tool_command": "superresolution",
18 |             "tool_command_args": { "output_format": "png" }
19 |         }
20 |     },
21 |     "log_severity_level": 0,
22 |     "clean_cache": true,
23 |     "cache_dir": "cache",
24 |     "output_dir": "models"
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/super_resolution/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime_extensions
2 | 


--------------------------------------------------------------------------------
/examples/test/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/examples/test/azureml/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/examples/test/azureml/test_resnet_vitis_ai_ptq_cpu_aml.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | from ..utils import check_output, get_example_dir, patch_config
10 | 
11 | 
12 | @pytest.fixture(scope="module", autouse=True)
13 | def setup():
14 |     """Setups any state specific to the execution of the given module."""
15 |     os.chdir(get_example_dir("resnet"))
16 | 
17 | 
18 | @pytest.mark.parametrize("system", ["aml_system"])
19 | @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"])
20 | def test_resnet(system, olive_json):
21 |     from olive.workflows import run as olive_run
22 | 
23 |     olive_config = patch_config(olive_json, None, None, system)
24 | 
25 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
26 |     check_output(workflow_output)
27 | 


--------------------------------------------------------------------------------
/examples/test/local/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/examples/test/local/test_ast.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from ..utils import check_output, get_example_dir
11 | 
12 | 
13 | @pytest.fixture(scope="module", autouse=True)
14 | def setup():
15 |     """Setups any state specific to the execution of the given module."""
16 |     os.chdir(get_example_dir("ast"))
17 | 
18 | 
19 | def test_ast():
20 |     from olive.workflows import run as olive_run
21 | 
22 |     with open("ast.json") as f:
23 |         olive_config = json.load(f)
24 | 
25 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
26 |     check_output(workflow_output)
27 | 


--------------------------------------------------------------------------------
/examples/test/local/test_bert_inc.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from ..utils import check_output, get_example_dir
11 | 
12 | 
13 | @pytest.fixture(scope="module", autouse=True)
14 | def setup():
15 |     """Setups any state specific to the execution of the given module."""
16 |     os.chdir(get_example_dir("bert"))
17 | 
18 | 
19 | @pytest.mark.parametrize("olive_json", ["bert_inc_dynamic_ptq_cpu.json", "bert_inc_ptq_cpu.json"])
20 | def test_bert(olive_json):
21 |     from olive.workflows import run as olive_run
22 | 
23 |     with open(olive_json) as f:
24 |         olive_config = json.load(f)
25 | 
26 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
27 |     check_output(workflow_output)
28 | 


--------------------------------------------------------------------------------
/examples/test/local/test_bert_ptq_cpu.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | from ..utils import check_output, get_example_dir, patch_config
10 | 
11 | 
12 | @pytest.fixture(scope="module", autouse=True)
13 | def setup():
14 |     """Setups any state specific to the execution of the given module."""
15 |     os.chdir(get_example_dir("bert"))
16 | 
17 | 
18 | @pytest.mark.parametrize("sampler", ["tpe"])
19 | @pytest.mark.parametrize("execution_order", ["joint"])
20 | @pytest.mark.parametrize("system", ["local_system"])
21 | @pytest.mark.parametrize("olive_json", ["bert_ptq_cpu.json"])
22 | def test_bert(sampler, execution_order, system, olive_json):
23 |     from olive.workflows import run as olive_run
24 | 
25 |     olive_config = patch_config(olive_json, sampler, execution_order, system)
26 |     # remove the latency goal since it is flaky on CI
27 |     metrics = olive_config["evaluators"]["common_evaluator"]["metrics"]
28 |     del metrics[1]["sub_types"][0]["goal"]
29 | 
30 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
31 |     check_output(workflow_output)
32 | 


--------------------------------------------------------------------------------
/examples/test/local/test_bert_ptq_cpu_docker.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | import platform
 7 | 
 8 | import pytest
 9 | 
10 | from olive.common.constants import OS
11 | 
12 | from ..utils import check_output, get_example_dir, patch_config
13 | 
14 | 
15 | @pytest.fixture(scope="module", autouse=True)
16 | def setup():
17 |     """Setups any state specific to the execution of the given module."""
18 |     os.chdir(get_example_dir("bert"))
19 | 
20 | 
21 | @pytest.mark.parametrize("sampler", ["tpe"])
22 | @pytest.mark.parametrize("execution_order", ["joint"])
23 | @pytest.mark.parametrize("system", ["docker_system"])
24 | @pytest.mark.parametrize("olive_json", ["bert_ptq_cpu.json"])
25 | def test_bert(sampler, execution_order, system, olive_json):
26 |     if system == "docker_system" and platform.system() == OS.WINDOWS:
27 |         pytest.skip("Skip Linux containers on Windows host test case.")
28 | 
29 |     from olive.workflows import run as olive_run
30 | 
31 |     olive_config = patch_config(olive_json, sampler, execution_order, system)
32 | 
33 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
34 |     check_output(workflow_output)
35 | 


--------------------------------------------------------------------------------
/examples/test/local/test_deberta.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from ..utils import check_output, get_example_dir
11 | 
12 | 
13 | @pytest.fixture(scope="module", autouse=True)
14 | def setup():
15 |     """Setups any state specific to the execution of the given module."""
16 |     os.chdir(get_example_dir("deberta"))
17 | 
18 | 
19 | def test_deberta():
20 |     from olive.workflows import run as olive_run
21 | 
22 |     with open("deberta.json") as f:
23 |         olive_config = json.load(f)
24 | 
25 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
26 |     check_output(workflow_output)
27 | 


--------------------------------------------------------------------------------
/examples/test/local/test_mistral_fp16.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from olive.common.hf.login import huggingface_login
11 | 
12 | from ..utils import check_output, get_example_dir
13 | 
14 | 
15 | @pytest.fixture(scope="module", autouse=True)
16 | def setup():
17 |     """Setups any state specific to the execution of the given module."""
18 |     os.chdir(get_example_dir("mistral"))
19 | 
20 | 
21 | @pytest.mark.parametrize("olive_json", ["mistral_fp16.json"])
22 | def test_mistral(olive_json):
23 |     from olive.workflows import run as olive_run
24 | 
25 |     hf_token = os.environ.get("HF_TOKEN")
26 |     huggingface_login(hf_token)
27 | 
28 |     with open(olive_json) as f:
29 |         olive_config = json.load(f)
30 | 
31 |     footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
32 |     check_output(footprint)
33 | 


--------------------------------------------------------------------------------
/examples/test/local/test_mobilenet.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | from olive.common.hf.login import huggingface_login
10 | 
11 | from ..utils import check_output, get_example_dir, patch_config
12 | 
13 | 
14 | @pytest.fixture(scope="module", autouse=True)
15 | def setup():
16 |     """Setups any state specific to the execution of the given module."""
17 |     os.chdir(get_example_dir("mobilenet/onnx"))
18 | 
19 | 
20 | def test_mobilenet():
21 |     from olive.workflows import run as olive_run
22 | 
23 |     hf_token = os.environ.get("HF_TOKEN")
24 |     huggingface_login(hf_token)
25 | 
26 |     olive_config = patch_config("config.json")
27 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
28 |     check_output(workflow_output)
29 | 


--------------------------------------------------------------------------------
/examples/test/local/test_mobilenet_qnn_ep.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from olive.common.utils import retry_func, run_subprocess
11 | 
12 | from ..utils import get_example_dir
13 | 
14 | 
15 | @pytest.fixture(scope="module", autouse=True)
16 | def setup():
17 |     """Setups any state specific to the execution of the given module."""
18 |     os.chdir(get_example_dir("mobilenet/qnn"))
19 | 
20 |     retry_func(run_subprocess, kwargs={"cmd": "python download_files.py", "check": True})
21 | 
22 | 
23 | def test_mobilenet_qnn_ep():
24 |     from olive.workflows import run as olive_run
25 | 
26 |     with open("mobilenet_qnn_ep.json") as f:
27 |         config = json.load(f)
28 | 
29 |     # only run optimization here, needs qnn-ep to run evaluation
30 |     del config["evaluators"], config["evaluator"]
31 | 
32 |     # need to pass [] since the parser reads from sys.argv
33 |     workflow_output = olive_run(config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
34 | 
35 |     # make sure it only ran for npu-qnn
36 |     assert len(workflow_output.get_available_devices()) == 1
37 |     assert workflow_output["npu"] is not None
38 |     assert workflow_output["npu"]["QNNExecutionProvider"] is not None
39 | 


--------------------------------------------------------------------------------
/examples/test/local/test_phi2.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from olive.common.hf.login import huggingface_login
11 | 
12 | from ..utils import assert_nodes, get_example_dir
13 | 
14 | 
15 | @pytest.fixture(scope="module", autouse=True)
16 | def setup():
17 |     """Setups any state specific to the execution of the given module."""
18 |     os.chdir(get_example_dir("phi2"))
19 | 
20 | 
21 | def test_phi2_genai():
22 |     from olive.workflows import run as olive_run
23 | 
24 |     hf_token = os.environ.get("HF_TOKEN")
25 |     huggingface_login(hf_token)
26 | 
27 |     with open("phi2_genai.json") as f:
28 |         olive_config = json.load(f)
29 | 
30 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
31 |     assert_nodes(workflow_output)
32 | 


--------------------------------------------------------------------------------
/examples/test/local/test_resnet_qat.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | from olive.common.utils import retry_func, run_subprocess
10 | 
11 | from ..utils import check_output, get_example_dir, patch_config
12 | 
13 | 
14 | @pytest.fixture(scope="module", autouse=True)
15 | def setup():
16 |     """Setups any state specific to the execution of the given module."""
17 |     os.chdir(get_example_dir("resnet"))
18 | 
19 |     # prepare model and data
20 |     # retry since it fails randomly
21 |     retry_func(run_subprocess, kwargs={"cmd": "python prepare_model_data.py", "check": True})
22 | 
23 | 
24 | @pytest.mark.parametrize("sampler", ["random"])
25 | @pytest.mark.parametrize("execution_order", ["pass-by-pass"])
26 | @pytest.mark.parametrize("system", ["local_system"])
27 | @pytest.mark.parametrize(
28 |     "olive_json", ["resnet_qat_default_train_loop_cpu.json", "resnet_qat_lightning_module_cpu.json"]
29 | )
30 | def test_resnet(sampler, execution_order, system, olive_json):
31 |     from olive.workflows import run as olive_run
32 | 
33 |     olive_config = patch_config(olive_json, sampler, execution_order, system)
34 | 
35 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
36 |     check_output(workflow_output)
37 | 


--------------------------------------------------------------------------------
/examples/test/local/test_resnet_vitis_ai_ptq_cpu.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import os
 6 | 
 7 | import pytest
 8 | 
 9 | from ..utils import check_output, get_example_dir, patch_config
10 | 
11 | 
12 | @pytest.fixture(scope="module", autouse=True)
13 | def setup():
14 |     """Setups any state specific to the execution of the given module."""
15 |     os.chdir(get_example_dir("resnet"))
16 | 
17 | 
18 | @pytest.mark.skip(reason="Disable failing tests")
19 | @pytest.mark.parametrize("system", ["local_system"])
20 | @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"])
21 | def test_resnet(system, olive_json):
22 |     from olive.workflows import run as olive_run
23 | 
24 |     olive_config = patch_config(olive_json, None, None, system)
25 | 
26 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
27 |     check_output(workflow_output)
28 | 


--------------------------------------------------------------------------------
/examples/test/local/test_super_resolution.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import os
 7 | 
 8 | import pytest
 9 | 
10 | from ..utils import assert_nodes, get_example_dir
11 | 
12 | 
13 | @pytest.fixture(scope="module", autouse=True)
14 | def setup():
15 |     """Setups any state specific to the execution of the given module."""
16 |     os.chdir(get_example_dir("super_resolution"))
17 | 
18 | 
19 | def test_super_resolution():
20 |     from olive.workflows import run as olive_run
21 | 
22 |     with open("config.json") as f:
23 |         olive_config = json.load(f)
24 | 
25 |     workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None))
26 |     assert_nodes(workflow_output)
27 | 


--------------------------------------------------------------------------------
/examples/vgg/.gitignore:
--------------------------------------------------------------------------------
1 | outputs/
2 | 


--------------------------------------------------------------------------------
/examples/vgg/prepare_config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | import platform
 7 | from pathlib import Path
 8 | 
 9 | from olive.common.constants import OS
10 | 
11 | 
12 | def resolve_windows_config():
13 |     with Path("vgg_config.json").open() as f:
14 |         snpe_windows_config = json.load(f)
15 | 
16 |     del snpe_windows_config["passes"]["snpe_quantization"]
17 |     with Path("vgg_config.json").open("w") as f:
18 |         json.dump(snpe_windows_config, f, indent=4)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     if platform.system() == OS.WINDOWS:
23 |         resolve_windows_config()
24 | 


--------------------------------------------------------------------------------
/examples/vgg/requirements.txt:
--------------------------------------------------------------------------------
1 | onnxruntime
2 | pillow
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/examples/vgg/vgg_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": { "type": "ONNXModel", "model_path": "models/vgg.onnx" },
 3 |     "data_configs": [
 4 |         {
 5 |             "name": "raw_data",
 6 |             "type": "RawDataContainer",
 7 |             "load_dataset_config": {
 8 |                 "data_dir": "data",
 9 |                 "input_names": [ "data" ],
10 |                 "input_shapes": [ [ 1, 3, 224, 224 ] ],
11 |                 "input_dirs": [ "." ],
12 |                 "input_suffix": ".raw",
13 |                 "input_order_file": "input_order.txt"
14 |             }
15 |         }
16 |     ],
17 |     "passes": {
18 |         "snpe_conversion": {
19 |             "type": "SNPEConversion",
20 |             "input_names": [ "data" ],
21 |             "input_shapes": [ [ 1, 3, 224, 224 ] ],
22 |             "output_names": [ "vgg0_dense2_fwd" ]
23 |         },
24 |         "snpe_quantization": { "type": "SNPEQuantization", "enable_htp": true, "data_config": "raw_data" }
25 |     },
26 |     "log_severity_level": 0,
27 |     "clean_cache": true,
28 |     "cache_dir": "cache",
29 |     "output_dir": "outputs"
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/vit/openvino/requirements.txt:
--------------------------------------------------------------------------------
1 | olive-ai[openvino]
2 | torchvision
3 | 


--------------------------------------------------------------------------------
/examples/vit/qnn/README.md:
--------------------------------------------------------------------------------
 1 | # Vision Transformer (ViT) Optimization with PTQ on Qualcomm NPU using QNN EP
 2 | This example performs ViT optimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline:
 3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model*
 4 | 
 5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed.
 6 | 
 7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step.
 8 | 
 9 | ## Test with Tiny-ImageNet-200
10 | Tiny-ImageNet-200 is a smaller subset of the ImageNet dataset containing 200 classes, commonly used for benchmarking deep learning models.
11 | 
12 | You can test output model with provided scripts. It is also a example you can refer about inference with model.
13 | - Download dataset from http://cs231n.stanford.edu/tiny-imagenet-200.zip and extract.
14 | - Go to subfolder *val_tiny_imagenet*. In *val_tiny_imagenet.py*, update *path_to_tiny_imagenet* with Tiny-ImageNet-200 root path and *path_to_model*. Modify *limit* as how many number you want in your test.
15 | - Run
16 | ```
17 | python .\val_tiny_imagenet.py
18 | ```
19 | 


--------------------------------------------------------------------------------
/examples/vit/qnn/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | tabulate
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/olive/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import logging
 6 | import sys
 7 | 
 8 | _logger = logging.getLogger(__name__)
 9 | _logger.setLevel(logging.INFO)
10 | 
11 | _sc = logging.StreamHandler(stream=sys.stdout)
12 | _formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d:%(funcName)s] %(message)s")
13 | _sc.setFormatter(_formatter)
14 | _logger.addHandler(_sc)
15 | _logger.propagate = False
16 | 
17 | __version__ = "0.10.0.dev0"
18 | 
19 | # pylint: disable=C0413
20 | 
21 | from olive.engine.output import DeviceOutput, ModelOutput, WorkflowOutput  # noqa: E402
22 | 
23 | __all__ = ["DeviceOutput", "ModelOutput", "WorkflowOutput"]
24 | 


--------------------------------------------------------------------------------
/olive/__main__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | # This is to support running Olive CLI as a module in case olive command
 6 | # is not available in the PATH.
 7 | # Example: python -m olive
 8 | if __name__ == "__main__":
 9 |     from olive.cli.launcher import main
10 | 
11 |     main(called_as_console_script=False)
12 | 


--------------------------------------------------------------------------------
/olive/auto_optimizer/config_template/opt_level_passes.yaml:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | "0":
 7 |   # 1. The first dim of the list is the pass order
 8 |   # 2. The second dim of the list is the pass list which can be selected to run one by one
 9 |   # take this opt_level 0 an example, olive will run the passes in the following order:
10 |   # [OnnxConversion] -> [OrtTransformersOptimization] -> [OrtMixedPrecision, OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer] -> [OrtSessionParamsTuning]
11 |   # and run bfs to generate available pass flows(path), like:
12 |   # OnnxConversion -> OrtTransformersOptimization -> OrtMixedPrecision -> OrtSessionParamsTuning
13 |   # OnnxConversion -> OrtTransformersOptimization -> OnnxQuantization -> OrtSessionParamsTuning
14 |   # OnnxConversion -> OrtTransformersOptimization -> IncQuantization -> OrtSessionParamsTuning
15 |   # and etc.
16 | 
17 |   - [OnnxConversion, ModelBuilder]
18 |   - [OrtTransformersOptimization]
19 |   - [OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer, OrtMixedPrecision]
20 |   - [OrtSessionParamsTuning]
21 | 


--------------------------------------------------------------------------------
/olive/azureml/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/cli/configure_qualcomm_sdk.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from argparse import ArgumentParser
 6 | 
 7 | from olive.cli.base import BaseOliveCLICommand
 8 | 
 9 | 
10 | class ConfigureQualcommSDKCommand(BaseOliveCLICommand):
11 |     @staticmethod
12 |     def register_subcommand(parser: ArgumentParser):
13 |         sub_parser = parser.add_parser(
14 |             "configure-qualcomm-sdk",
15 |             help="Configure Qualcomm SDK for Olive",
16 |         )
17 |         sub_parser.add_argument(
18 |             "--py_version",
19 |             type=str,
20 |             help="Python version: Use 3.6 for tensorflow 1.15 and 3.8 otherwise",
21 |             required=True,
22 |             choices=["3.6", "3.8"],
23 |         )
24 |         sub_parser.add_argument(
25 |             "--sdk",
26 |             type=str,
27 |             help="Qualcomm SDK: snpe or qnn",
28 |             required=True,
29 |             choices=["snpe", "qnn"],
30 |         )
31 | 
32 |         sub_parser.set_defaults(func=ConfigureQualcommSDKCommand)
33 | 
34 |     def run(self):
35 |         from olive.platform_sdk.qualcomm.configure.configure import configure
36 | 
37 |         configure(self.args.py_version, self.args.sdk)
38 | 


--------------------------------------------------------------------------------
/olive/cli/constants.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | CONDA_CONFIG = {
 6 |     "name": "olive_finetune",
 7 |     "channels": ["defaults"],
 8 |     "dependencies": [
 9 |         "python=3.9.21",
10 |         "pip=22.3.1",
11 |         {
12 |             "pip": [
13 |                 "accelerate",
14 |                 "bitsandbytes",
15 |                 "peft",
16 |                 "sentencepiece",
17 |                 "datasets",
18 |                 "evaluate",
19 |                 "psutil",
20 |                 "optimum",
21 |                 "scipy",
22 |                 "scikit-learn",
23 |                 "torch",
24 |                 "onnxruntime-genai",
25 |                 "--extra-index-url https://download.pytorch.org/whl/cu118",
26 |                 "transformers>=4.41.1",
27 |                 "git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]",
28 |             ]
29 |         },
30 |     ],
31 | }
32 | 


--------------------------------------------------------------------------------
/olive/common/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/common/constants.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.common.utils import StrEnumBase
 6 | 
 7 | 
 8 | class OS(StrEnumBase):
 9 |     WINDOWS = "Windows"
10 |     LINUX = "Linux"
11 | 
12 | 
13 | ##### AzureML system #####
14 | 
15 | WORKFLOW_CONFIG = "workflow_config"
16 | WORKFLOW_ARTIFACTS = "workflow_artifacts"
17 | HF_LOGIN = "HF_LOGIN"
18 | KEYVAULT_NAME = "KEYVAULT_NAME"
19 | 
20 | 
21 | ############# Engine #############
22 | 
23 | DEFAULT_WORKFLOW_ID = "default_workflow"
24 | DEFAULT_CACHE_DIR = ".olive-cache"
25 | 
26 | 
27 | ############# Packaging #############
28 | 
29 | BASE_IMAGE = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04"
30 | 
31 | ############# HF #############
32 | 
33 | DEFAULT_HF_TASK = "text-generation-with-past"
34 | 
35 | 
36 | ########### Model ###########
37 | 
38 | LOCAL_INPUT_MODEL_ID = "local_input_model"
39 | 
40 | 
41 | ########### Cache ###########
42 | 
43 | ACCOUNT_URL_TEMPLATE = "https://{account_name}.blob.core.windows.net"
44 | 


--------------------------------------------------------------------------------
/olive/common/hf/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/common/hf/login.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import logging
 6 | import os
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | def huggingface_login(token: str):
12 |     from huggingface_hub import login
13 | 
14 |     login(token=token)
15 | 
16 | 
17 | def aml_runner_hf_login():
18 |     hf_login = os.environ.get("HF_LOGIN")
19 |     if hf_login:
20 |         from azure.identity import DefaultAzureCredential
21 |         from azure.keyvault.secrets import SecretClient
22 | 
23 |         keyvault_name = os.environ.get("KEYVAULT_NAME")
24 |         logger.debug("Getting token from keyvault %s", keyvault_name)
25 | 
26 |         credential = DefaultAzureCredential()
27 |         secret_client = SecretClient(vault_url=f"https://{keyvault_name}.vault.azure.net/", credential=credential)
28 |         token = secret_client.get_secret("hf-token").value
29 |         huggingface_login(token)
30 | 


--------------------------------------------------------------------------------
/olive/common/hf/mappings.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | # mapping from task to peft task type
 7 | # refer to peft.utils.peft_types.TaskType for all possible values
 8 | TASK_TO_PEFT_TASK_TYPE = {
 9 |     "text-classification": "SEQ_CLS",
10 |     "text-generation": "CAUSAL_LM",
11 |     # TODO(jambayk): see if we need more task types
12 | }
13 | 
14 | MODEL_TYPE_MAPPING = {
15 |     "camembert": "bert",
16 |     "deberta": "bert",
17 |     "deberta-v2": "bert",
18 |     "distilbert": "bert",
19 |     "gpt_neox": "gpt2",
20 |     "gpt-j": "gpt2",
21 |     "llama": "gpt2",
22 |     "roberta": "bert",
23 |     "phi3": "phi",
24 | }
25 | 
26 | MODELS_TO_LORA_TARGET_MODULES_MAPPING = {"phi3": ["o_proj", "qkv_proj"]}
27 | 


--------------------------------------------------------------------------------
/olive/common/pydantic_v1.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | """Pydantic v1 compatibility module.
 7 | 
 8 | Pydantic v2 has breaking changes that are not compatible with the current version of Olive.
 9 | Migration Guide: https://docs.pydantic.dev/latest/migration/.
10 | 
11 | In order to support both versions of Pydantic, we use this module to access pydantic's v1 API.
12 | """
13 | 
14 | # pylint: disable=redefined-builtin, wildcard-import, unused-wildcard-import
15 | 
16 | try:
17 |     # pydantic v2
18 |     from pydantic.v1 import *  # noqa: F403
19 | except ImportError:
20 |     # pydantic v1
21 |     from pydantic import *  # noqa: F403
22 | 


--------------------------------------------------------------------------------
/olive/data/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.data.component import *  # noqa: F403
6 | from olive.data.container import *  # noqa: F403
7 | 


--------------------------------------------------------------------------------
/olive/data/component/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.data.component import dataloader, load_dataset, post_process_data, pre_process_data
 6 | 
 7 | __all__ = [
 8 |     "dataloader",
 9 |     "load_dataset",
10 |     "post_process_data",
11 |     "pre_process_data",
12 | ]
13 | 


--------------------------------------------------------------------------------
/olive/data/container/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.data.container import data_container, dummy_data_container, huggingface_container, raw_data_container
 6 | 
 7 | __all__ = [
 8 |     "data_container",
 9 |     "dummy_data_container",
10 |     "huggingface_container",
11 |     "raw_data_container",
12 | ]
13 | 


--------------------------------------------------------------------------------
/olive/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.engine.config import EngineConfig
 6 | from olive.engine.engine import Engine
 7 | from olive.engine.footprint import Footprint
 8 | 
 9 | __all__ = [
10 |     "Engine",
11 |     "EngineConfig",
12 |     "Footprint",
13 | ]
14 | 


--------------------------------------------------------------------------------
/olive/engine/packaging/Dockerfile.base:
--------------------------------------------------------------------------------
 1 | # DisableDockerDetector "Prevent warnings on 1ES builds"
 2 | FROM <BASE_IMAGE>
 3 | 
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade
 7 | RUN apt-get install -y --no-install-recommends wget gnupg
 8 | 
 9 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers
10 | 
11 | WORKDIR /olive
12 | 
13 | ADD <DIR> /olive
14 | 
15 | RUN pip install -r requirements.txt
16 | 
17 | 


--------------------------------------------------------------------------------
/olive/engine/packaging/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/evaluator/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | from olive.evaluator.metric import Metric, SubMetric
 7 | from olive.evaluator.metric_result import MetricResult, SubMetricResult, flatten_metric_result
 8 | from olive.evaluator.olive_evaluator import OliveEvaluator
 9 | 
10 | __all__ = [
11 |     "Metric",
12 |     "MetricResult",
13 |     "OliveEvaluator",
14 |     "SubMetric",
15 |     "SubMetricResult",
16 |     "flatten_metric_result",
17 | ]
18 | 


--------------------------------------------------------------------------------
/olive/exception/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | class OliveError(Exception):
 6 |     """Base class for Olive exceptions."""
 7 | 
 8 | 
 9 | class OlivePassError(OliveError):
10 |     """Base class for Olive pass exceptions."""
11 | 
12 | 
13 | class OliveEvaluationError(OliveError):
14 |     """Base class for Olive evaluation exceptions."""
15 | 
16 | 
17 | EXCEPTIONS_TO_RAISE = (AssertionError, AttributeError, ImportError, TypeError, ValueError)
18 | 


--------------------------------------------------------------------------------
/olive/hardware/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.hardware.accelerator import (
 6 |     DEFAULT_CPU_ACCELERATOR,
 7 |     DEFAULT_GPU_CUDA_ACCELERATOR,
 8 |     DEFAULT_GPU_TRT_ACCELERATOR,
 9 |     AcceleratorLookup,
10 |     AcceleratorSpec,
11 |     Device,
12 | )
13 | 
14 | __all__ = [
15 |     "DEFAULT_CPU_ACCELERATOR",
16 |     "DEFAULT_GPU_CUDA_ACCELERATOR",
17 |     "DEFAULT_GPU_TRT_ACCELERATOR",
18 |     "AcceleratorLookup",
19 |     "AcceleratorSpec",
20 |     "Device",
21 | ]
22 | 


--------------------------------------------------------------------------------
/olive/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.model.config import ModelConfig
6 | from olive.model.handler import *  # noqa: F403
7 | 
8 | __all__ = ["ModelConfig"]
9 | 


--------------------------------------------------------------------------------
/olive/model/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.model.config.hf_config import HfLoadKwargs
 6 | from olive.model.config.io_config import (
 7 |     IoConfig,
 8 |     complete_kv_cache_with_model_attributes,
 9 |     extend_io_config_with_kv_cache,
10 | )
11 | from olive.model.config.kv_cache_config import KVCacheConfig
12 | from olive.model.config.model_config import ModelConfig
13 | 
14 | __all__ = [
15 |     "HfLoadKwargs",
16 |     "IoConfig",
17 |     "KVCacheConfig",
18 |     "ModelConfig",
19 |     "complete_kv_cache_with_model_attributes",
20 |     "extend_io_config_with_kv_cache",
21 | ]
22 | 


--------------------------------------------------------------------------------
/olive/model/config/registry.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | REGISTRY = {}
 6 | 
 7 | 
 8 | def model_handler_registry(model_type):
 9 |     """Decorate and register all OliveModelHandler subclasses.
10 | 
11 |     Args:
12 |         model_type (str): The model type registration name. Is case-insensitive and stored in lowercase.
13 | 
14 |     Returns:
15 |         cls: The class of register.
16 | 
17 |     """
18 |     model_type = model_type.lower()
19 | 
20 |     def decorator_model_class(cls):
21 |         if model_type in REGISTRY:
22 |             raise ValueError("Cannot have two model handlers with the same name")
23 | 
24 |         REGISTRY[model_type] = cls
25 |         cls.model_type = model_type
26 |         return cls
27 | 
28 |     return decorator_model_class
29 | 
30 | 
31 | def get_model_handler(model_type):
32 |     if not is_valid_model_type(model_type):
33 |         raise ValueError(f"Unknown model type {model_type}")
34 |     return REGISTRY[model_type.lower()]
35 | 
36 | 
37 | def is_valid_model_type(model_type):
38 |     return model_type.lower() in REGISTRY
39 | 


--------------------------------------------------------------------------------
/olive/model/handler/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.model.handler.base import OliveModelHandler
 6 | from olive.model.handler.composite import CompositeModelHandler
 7 | from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler
 8 | from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
 9 | from olive.model.handler.openvino import OpenVINOModelHandler
10 | from olive.model.handler.pytorch import PyTorchModelHandler
11 | from olive.model.handler.qnn import QNNModelHandler
12 | from olive.model.handler.snpe import SNPEModelHandler
13 | from olive.model.handler.tensorflow import TensorFlowModelHandler
14 | 
15 | __all__ = [
16 |     "CompositeModelHandler",
17 |     "DistributedHfModelHandler",
18 |     "DistributedOnnxModelHandler",
19 |     "HfModelHandler",
20 |     "ONNXModelHandler",
21 |     "OliveModelHandler",
22 |     "OpenVINOModelHandler",
23 |     "PyTorchModelHandler",
24 |     "QNNModelHandler",
25 |     "SNPEModelHandler",
26 |     "TensorFlowModelHandler",
27 | ]
28 | 


--------------------------------------------------------------------------------
/olive/model/handler/mixin/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.model.handler.mixin.dummy_inputs import DummyInputsMixin
 6 | from olive.model.handler.mixin.hf import HfMixin
 7 | from olive.model.handler.mixin.io_config import IoConfigMixin
 8 | from olive.model.handler.mixin.json import JsonMixin
 9 | from olive.model.handler.mixin.kv_cache import PytorchKvCacheMixin
10 | from olive.model.handler.mixin.mlflow import MLFlowTransformersMixin
11 | from olive.model.handler.mixin.onnx_ep import OnnxEpValidateMixin
12 | from olive.model.handler.mixin.resource import ResourceMixin
13 | 
14 | __all__ = [
15 |     "DummyInputsMixin",
16 |     "HfMixin",
17 |     "IoConfigMixin",
18 |     "JsonMixin",
19 |     "MLFlowTransformersMixin",
20 |     "OnnxEpValidateMixin",
21 |     "PytorchKvCacheMixin",
22 |     "ResourceMixin",
23 | ]
24 | 


--------------------------------------------------------------------------------
/olive/model/handler/mixin/io_config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from typing import Any
 6 | 
 7 | 
 8 | class IoConfigMixin:
 9 |     """Provide the following model get io config functionalities.
10 | 
11 |     Each model handler could choose to override the behavior.
12 |     For example, both PyTorch model and ONNX model handler choose to override the default behavior.
13 |     """
14 | 
15 |     @property
16 |     def io_config(self) -> dict[str, Any]:
17 |         return self._io_config
18 | 


--------------------------------------------------------------------------------
/olive/model/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.model.utils.onnx_utils import resolve_onnx_path
 6 | from olive.model.utils.path_utils import normalize_path_suffix
 7 | 
 8 | __all__ = [
 9 |     "normalize_path_suffix",
10 |     "resolve_onnx_path",
11 | ]
12 | 


--------------------------------------------------------------------------------
/olive/passes/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.passes.olive_pass import FullPassConfig, Pass
 6 | from olive.passes.pass_config import AbstractPassConfig, PassModuleConfig, PassParamDefault
 7 | 
 8 | REGISTRY = Pass.registry
 9 | 
10 | __all__ = [
11 |     "REGISTRY",
12 |     "AbstractPassConfig",
13 |     "FullPassConfig",
14 |     "Pass",
15 |     "PassModuleConfig",
16 |     "PassParamDefault",
17 | ]
18 | 


--------------------------------------------------------------------------------
/olive/passes/onnx/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/onnx/qnn/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/onnx/tensorrt/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/onnx/vitis_ai/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
 3 | # SPDX-License-Identifier: MIT
 4 | #
 5 | from onnxruntime.quantization.calibrate import CalibrationDataReader
 6 | from onnxruntime.quantization.quant_utils import QuantFormat, QuantType
 7 | 
 8 | from olive.passes.onnx.vitis_ai.quant_utils import PowerOfTwoMethod
 9 | from olive.passes.onnx.vitis_ai.quantize import quantize_static
10 | 
11 | __all__ = [
12 |     "CalibrationDataReader",
13 |     "PowerOfTwoMethod",
14 |     "QuantFormat",
15 |     "QuantType",
16 |     "quantize_static",
17 | ]
18 | 


--------------------------------------------------------------------------------
/olive/passes/openvino/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/pytorch/pytorch_lightning_utils.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import pytorch_lightning as pl
 6 | from pytorch_lightning.strategies import DDPStrategy
 7 | 
 8 | 
 9 | def create_ddp_strategy(cluster, accelerator):
10 |     return DDPStrategy(find_unused_parameters=True, cluster_environment=cluster, accelerator=accelerator)
11 | 
12 | 
13 | def create_trainer(
14 |     logger,
15 |     callbacks=None,
16 |     max_epochs=None,
17 |     max_steps=None,
18 |     val_check_interval=None,
19 |     log_every_n_steps=50,
20 |     precision=32,
21 |     default_root_dir=None,
22 |     **kwargs,
23 | ):
24 |     return pl.Trainer(
25 |         logger=logger,
26 |         callbacks=callbacks,
27 |         max_epochs=max_epochs,
28 |         max_steps=max_steps,
29 |         val_check_interval=val_check_interval,
30 |         log_every_n_steps=log_every_n_steps,
31 |         precision=precision,
32 |         default_root_dir=default_root_dir,
33 |         **kwargs,
34 |     )
35 | 


--------------------------------------------------------------------------------
/olive/passes/qnn/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/snpe/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/passes/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | from olive.platform_sdk.qualcomm.snpe.env import SNPESDKEnv
 7 | 
 8 | __all__ = [
 9 |     "SNPESDKEnv",
10 | ]
11 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/configure/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 
6 | # TODO(anyone): change this sub-module back to a file when `olive.platform_sdk.qualcomm.configure` command
7 | # is removed from Olive.
8 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/configure/__main__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | # A separate __main__.py is implemented since CodeQL complains about circular imports otherwise.
 6 | if __name__ == "__main__":
 7 |     import sys
 8 | 
 9 |     from olive.cli.launcher import legacy_call
10 | 
11 |     legacy_call("olive.platform_sdk.qualcomm.configure", "configure-qualcomm-sdk", *sys.argv[1:])
12 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/copy_libcdsprpc.ps1:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | # copies the libcdsprpc.dll from driver location to given location
 7 | Set-PSDebug -Trace 2
 8 | if ( $args.count -eq 0 ) {
 9 |   echo "Please specify the output location of libcdsprpc.dll"
10 |   exit 1
11 | }
12 | $loc = [string](driverquery /v /fo csv | findstr qcadsprpc)
13 | if ( $loc -eq $null ) {
14 |   driverquery /v /fo csv
15 |   echo "Cannot locate FastRPC driver"
16 |   exit 1
17 | }
18 | $lll2 = $loc.Split(",")[15]
19 | if ( $lll2 -eq $null ) {
20 |   echo "Cannot locate path from FastRPC driver query"
21 |   exit 1
22 | }
23 | $lll = $lll2.Split('"')[1]
24 | if ( $lll -eq $null ) {
25 |   echo "Cannot locate path from FastRPC driver query"
26 |   exit 1
27 | }
28 | echo Driver location is: $lll
29 | $dir = Split-Path $lll
30 | # $dir = [System.IO.Path]::GetDirectoryName($lll)
31 | $f = Join-Path $dir -ChildPath libcdsprpc.dll
32 | echo Copying $f to $args[0]
33 | Copy-Item -Path $f -Destination $args[0]
34 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/qnn/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/qnn/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/snpe/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.platform_sdk.qualcomm.snpe.snpe import SNPEInferenceSession, SNPESessionOptions
6 | 
7 | __all__ = ["SNPEInferenceSession", "SNPESessionOptions"]
8 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/snpe/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/snpe/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/platform_sdk/qualcomm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/search/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/search/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.search.samplers.random_sampler import RandomSampler
 6 | from olive.search.samplers.search_sampler import SearchSampler
 7 | from olive.search.samplers.sequential_sampler import SequentialSampler
 8 | from olive.search.samplers.tpe_sampler import TPESampler
 9 | 
10 | REGISTRY = SearchSampler.registry
11 | 
12 | __all__ = ["REGISTRY", "RandomSampler", "SearchSampler", "SequentialSampler", "TPESampler"]
13 | 


--------------------------------------------------------------------------------
/olive/systems/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/systems/azureml/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.systems.azureml.aml_system import AzureMLSystem
6 | from olive.systems.common import AzureMLDockerConfig
7 | 
8 | __all__ = ["AzureMLDockerConfig", "AzureMLSystem"]
9 | 


--------------------------------------------------------------------------------
/olive/systems/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04
 6 | 
 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade
 8 | RUN pip install azure-ai-ml \
 9 |             azure-identity \
10 |             azureml-dataprep \
11 |             onnxruntime \
12 |             openvino \
13 |             openvino-dev[tensorflow,onnx] \
14 |             tensorflow \
15 |             onnxconverter_common \
16 |             olive-ai
17 | 
18 | ADD requirements.txt requirements.txt
19 | RUN pip install -r requirements.txt
20 | 
21 | WORKDIR /olive
22 | 


--------------------------------------------------------------------------------
/olive/systems/docker/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | # mcr image https://github.com/microsoft/mcr
 2 | # tag list https://mcr.microsoft.com/v2/azureml/openmpi4.1.0-ubuntu20.04/tags/list
 3 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
 4 | 
 5 | RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
 6 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers onnxruntime olive-ai
 7 | 
 8 | ADD requirements.txt requirements.txt
 9 | RUN pip install -r requirements.txt
10 | 


--------------------------------------------------------------------------------
/olive/systems/docker/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | # mcr image https://github.com/microsoft/mcr
 2 | # tag list https://mcr.microsoft.com/v2/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04/tags/list
 3 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
 4 | 
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade
 8 | RUN apt-get install -y --no-install-recommends wget gnupg
 9 | 
10 | # Install TensorRT
11 | RUN v="8.4.1-1+cuda11.6" &&\
12 |     apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
13 |     apt-get update &&\
14 |     apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} \
15 |         libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} \
16 |         python3-libnvinfer=${v} libnvinfer-samples=${v}
17 | 
18 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers onnxruntime-gpu olive-ai
19 | 
20 | ADD requirements.txt requirements.txt
21 | RUN pip install -r requirements.txt
22 | 


--------------------------------------------------------------------------------
/olive/systems/docker/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.systems.common import LocalDockerConfig
6 | from olive.systems.docker.docker_system import DockerSystem
7 | 
8 | __all__ = ["DockerSystem", "LocalDockerConfig"]
9 | 


--------------------------------------------------------------------------------
/olive/systems/isolated_ort/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.systems.isolated_ort.isolated_ort_system import IsolatedORTSystem
6 | 
7 | __all__ = ["IsolatedORTSystem"]
8 | 


--------------------------------------------------------------------------------
/olive/systems/python_environment/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.systems.python_environment.python_environment_system import PythonEnvironmentSystem
6 | 
7 | __all__ = ["PythonEnvironmentSystem"]
8 | 


--------------------------------------------------------------------------------
/olive/systems/python_environment/common_requirements.txt:
--------------------------------------------------------------------------------
1 | numpy<2.0
2 | protobuf
3 | psutil
4 | pydantic
5 | 


--------------------------------------------------------------------------------
/olive/systems/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.systems.utils.arg_parser import get_common_args, parse_config
 6 | from olive.systems.utils.misc import (
 7 |     create_managed_system,
 8 |     create_managed_system_with_cache,
 9 |     create_new_environ,
10 |     get_package_name_from_ep,
11 |     run_available_providers_runner,
12 | )
13 | 
14 | __all__ = [
15 |     "create_managed_system",
16 |     "create_managed_system_with_cache",
17 |     "create_new_environ",
18 |     "get_common_args",
19 |     "get_package_name_from_ep",
20 |     "parse_config",
21 |     "run_available_providers_runner",
22 | ]
23 | 


--------------------------------------------------------------------------------
/olive/systems/utils/available_providers_runner.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | # NOTE: Only onnxruntime and its dependencies can be imported in this file.
 6 | import argparse
 7 | import json
 8 | from pathlib import Path
 9 | 
10 | import onnxruntime as ort
11 | 
12 | 
13 | def get_args(raw_args):
14 |     parser = argparse.ArgumentParser(description="Get available execution providers")
15 |     parser.add_argument("--output_path", type=str, required=True)
16 | 
17 |     return parser.parse_args(raw_args)
18 | 
19 | 
20 | def main(raw_args=None):
21 |     args = get_args(raw_args)
22 | 
23 |     # get available execution providers
24 |     available_eps = ort.get_available_providers()
25 | 
26 |     # save to json
27 |     with Path(args.output_path).open("w") as f:
28 |         json.dump(available_eps, f)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/olive/workflows/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | from olive.workflows.run.run import run
6 | 
7 | __all__ = ["run"]
8 | 


--------------------------------------------------------------------------------
/olive/workflows/run/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/olive/workflows/run/__main__.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | if __name__ == "__main__":
 6 |     import sys
 7 | 
 8 |     from olive.cli.launcher import legacy_call
 9 | 
10 |     legacy_call("olive.workflows.run", "run", *sys.argv[1:])
11 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | editorconfig-checker
3 | lintrunner
4 | lintrunner-adapters
5 | pylint==3.3.6
6 | ruff==0.11.4
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | onnx
 3 | onnxscript>=0.2.5
 4 | optuna
 5 | pandas
 6 | pydantic
 7 | pyyaml
 8 | torch
 9 | torchmetrics>=1.0.0
10 | transformers
11 | 


--------------------------------------------------------------------------------
/scripts/generate_cost_model_artifacts.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from olive.common.utils import run_subprocess
 4 | 
 5 | models = {
 6 |     "Llama-2-7B": "meta-llama/Llama-2-7b-hf",
 7 |     "Llama-2-13B": "meta-llama/Llama-2-13b-hf",
 8 |     "Llama-3.1-8B": "meta-llama/Llama-3.1-8B",
 9 |     "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
10 |     "Phi-3.5-mini": "microsoft/Phi-3.5-mini-instruct",
11 | }
12 | 
13 | 
14 | def main():
15 |     asset_dir = Path(__file__).parent.parent / "assets" / "cost_models"
16 |     asset_dir.mkdir(parents=True, exist_ok=True)
17 | 
18 |     for model_name, model_id in models.items():
19 |         run_subprocess(
20 |             ["olive", "generate-cost-model", "-m", model_id, "-o", str(asset_dir / f"{model_name}.csv")], check=True
21 |         )
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     main()
26 | 


--------------------------------------------------------------------------------
/scripts/overwrite_version.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import argparse
 6 | from pathlib import Path
 7 | 
 8 | 
 9 | def get_args():
10 |     parser = argparse.ArgumentParser(description="Overwrite package version in __init__.py")
11 |     parser.add_argument("--version", type=str, required=True, help="Version to overwrite with")
12 |     return parser.parse_args()
13 | 
14 | 
15 | def main():
16 |     args = get_args()
17 |     version = args.version
18 | 
19 |     init_path = Path(__file__).parents[1].resolve() / "olive" / "__init__.py"
20 |     with open(init_path) as f:
21 |         lines = f.readlines()
22 |         for i, line in enumerate(lines):
23 |             if line.startswith("__version__"):
24 |                 lines[i] = f'__version__ = "{version}"\n'
25 |                 break
26 | 
27 |     with open(init_path, "w") as f:
28 |         f.writelines(lines)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     main()
33 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/aml_model_test/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/aml_model_test/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.10.16
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - azure-ai-ml
 9 |       - azure-identity
10 |       - azureml-dataprep!=4.12.0
11 |       - onnxruntime
12 |       - datasets
13 |       - scipy
14 |       - transformers==4.31.0 # TODO(team): 55036 Fixed error and update to latest version
15 |       - onnxconverter_common
16 |       - git+https://github.com/microsoft/Olive.git
17 | 


--------------------------------------------------------------------------------
/test/integ_test/aml_resource_path/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/azureml_eval/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/azureml_eval/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: project_environment
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.9.21
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |       - azureml-dataprep!=4.12.0
 9 |       - onnxruntime
10 |       - datasets
11 |       - transformers
12 |       - torchvision
13 |       - onnxconverter_common
14 |       - git+https://github.com/microsoft/Olive.git
15 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/azureml_eval/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from torchvision import datasets
 6 | from torchvision.transforms import ToTensor
 7 | 
 8 | from olive.data.registry import Registry
 9 | 
10 | 
11 | @Registry.register_post_process()
12 | def mnist_post_process_for_azureml_eval(res):
13 |     return res.argmax(1)
14 | 
15 | 
16 | @Registry.register_dataset()
17 | def mnist_dataset_for_azureml_eval(data_dir):
18 |     return datasets.MNIST(data_dir, download=True, transform=ToTensor())
19 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/docker_eval/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/docker_eval/dockerfile/Dockerfile:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04
 6 | 
 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade
 8 | RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
 9 | RUN pip install onnxruntime \
10 |             datasets \
11 |             git+https://github.com/microsoft/Olive.git \
12 |             onnxconverter_common \
13 |             openvino \
14 |             openvino-dev \
15 |             pandas \
16 |             plotly \
17 |             psutil \
18 |             transformers \
19 |             --no-cache-dir
20 | 
21 | WORKDIR /olive
22 | 


--------------------------------------------------------------------------------
/test/integ_test/evaluator/local_eval/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/integ_test/pass_runner/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/multiple_ep/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/multiple_ep/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets
2 | olive-ai
3 | onnxconverter_common
4 | torchvision
5 | transformers
6 | 


--------------------------------------------------------------------------------
/test/multiple_ep/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from torchvision import datasets
 6 | from torchvision.transforms import ToTensor
 7 | 
 8 | from olive.data.registry import Registry
 9 | 
10 | 
11 | @Registry.register_post_process()
12 | def mnist_post_process_for_multiple_ep(res):
13 |     return res.argmax(1)
14 | 
15 | 
16 | @Registry.register_dataset()
17 | def mnist_dataset_for_multiple_ep(data_dir, *args, **kwargs):
18 |     return datasets.MNIST(data_dir, transform=ToTensor())
19 | 


--------------------------------------------------------------------------------
/test/requirements-test-cpu.txt:
--------------------------------------------------------------------------------
1 | -r requirements-test.txt
2 | onnxruntime-genai
3 | 
4 | 


--------------------------------------------------------------------------------
/test/requirements-test-gpu.txt:
--------------------------------------------------------------------------------
1 | -r requirements-test.txt
2 | auto-gptq==0.7.1
3 | autoawq==0.2.8
4 | bitsandbytes
5 | onnxruntime-genai-cuda
6 | triton
7 | 


--------------------------------------------------------------------------------
/test/requirements-test.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | azure-ai-ml
 3 | azure-identity
 4 | azure-storage-blob
 5 | # azureml.evaluate.mlflow.hftransformers is deprecated in 0.0.66 and above
 6 | azureml-evaluate-mlflow>=0.0.60, <0.0.66
 7 | azureml-fsspec
 8 | # Pin azureml-metrics[all] greater than 0.0.26 to avoid breaking change in azureml-evaluate-mlflow
 9 | azureml-metrics[all]>=0.0.26
10 | coverage
11 | cppimport
12 | datasets
13 | docker>=7.1.0
14 | evaluate
15 | marshmallow<3.24.0
16 | mlflow>=2.4.0, <2.20.0
17 | neural-compressor<2.4
18 | nncf>=2.16.0
19 | numpy<2.0.0
20 | nvidia-modelopt
21 | onnx-graphsurgeon
22 | onnxconverter_common
23 | onnxmltools
24 | onnxoptimizer
25 | onnxruntime_extensions
26 | onnxscript>=0.2.4
27 | openvino>=2025.1.0
28 | optimum[openvino]>=1.17.0, <=1.24
29 | optuna
30 | pandas
31 | peft
32 | plotly
33 | psutil
34 | pytest
35 | pytorch_lightning
36 | scipy
37 | sentencepiece
38 | soundfile
39 | tabulate
40 | torchvision
41 | 


--------------------------------------------------------------------------------
/test/unit_test/.gitignore:
--------------------------------------------------------------------------------
1 | dummy_model.onnx
2 | 


--------------------------------------------------------------------------------
/test/unit_test/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/assets/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/assets/user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.model import OliveModelHandler
 6 | 
 7 | 
 8 | def eval_func(model: OliveModelHandler, device, execution_providers):
 9 |     return 0.382715310
10 | 
11 | 
12 | def metric_func(inference_output, actuals):
13 |     return 0.382715311
14 | 


--------------------------------------------------------------------------------
/test/unit_test/auto_optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/cli/output_model/model_config.json:
--------------------------------------------------------------------------------
1 | { "type": "PyTorchModel", "model_path": "model_path" }
2 | 


--------------------------------------------------------------------------------
/test/unit_test/common/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/common/test_get_attr.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import pytest
 6 | 
 7 | from olive.common.utils import get_attr
 8 | 
 9 | 
10 | def test_attr_exists():
11 |     class A:
12 |         def __init__(self, b):
13 |             self.b = b
14 | 
15 |     class B:
16 |         def __init__(self, c):
17 |             self.c = c
18 | 
19 |     class C:
20 |         def __init__(self):
21 |             self.d = "hi"
22 | 
23 |     c = C()
24 |     b = B(c)
25 |     a = A(b)
26 | 
27 |     attrs = ["", "b", "b.c", "b.c.d"]
28 |     expected = [a, b, c, "hi"]
29 |     for attr, exp in zip(attrs, expected):
30 |         assert get_attr(a, attr) == exp
31 | 
32 | 
33 | def test_attr_no_exists():
34 |     a = "hi"
35 | 
36 |     assert get_attr(a, "b") is None
37 | 
38 | 
39 | def test_attr_no_exists_raise():
40 |     a = "hi"
41 | 
42 |     with pytest.raises(AttributeError):
43 |         get_attr(a, "b", fail_on_not_found=True)
44 | 


--------------------------------------------------------------------------------
/test/unit_test/common/test_retry.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import pytest
 6 | 
 7 | from olive.common.utils import retry_func
 8 | 
 9 | # pylint: disable=global-statement
10 | 
11 | 
12 | num_tries = 0
13 | 
14 | 
15 | def fail_with_key_error():
16 |     global num_tries
17 |     if num_tries == 0:
18 |         num_tries += 1
19 |         raise KeyError("This is a key error")
20 |     else:
21 |         return True
22 | 
23 | 
24 | def return_args(*args, **kwargs):
25 |     return args, kwargs
26 | 
27 | 
28 | @pytest.mark.parametrize("exceptions", [KeyError, (KeyError, ValueError), Exception])
29 | def test_success(exceptions):
30 |     global num_tries
31 |     num_tries = 0
32 |     assert retry_func(fail_with_key_error, max_tries=2, delay=1, exceptions=exceptions)
33 |     assert num_tries == 1
34 | 
35 | 
36 | def test_failure():
37 |     global num_tries
38 |     num_tries = 0
39 |     with pytest.raises(KeyError):
40 |         retry_func(fail_with_key_error, max_tries=1, delay=1)
41 | 
42 | 
43 | def test_args():
44 |     assert retry_func(return_args, [1, 2, 3], {"a": 4, "b": 5}) == ((1, 2, 3), {"a": 4, "b": 5})
45 | 
46 | 
47 | def test_different_exceptions():
48 |     global num_tries
49 |     num_tries = 0
50 |     with pytest.raises(KeyError):
51 |         retry_func(fail_with_key_error, max_tries=2, delay=1, exceptions=ValueError)
52 | 


--------------------------------------------------------------------------------
/test/unit_test/conftest.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import shutil
 6 | 
 7 | import pytest
 8 | 
 9 | from test.unit_test.utils import create_onnx_model_file, delete_onnx_model_files
10 | 
11 | 
12 | @pytest.fixture(scope="session", autouse=True)
13 | def setup_onnx_model(request, tmp_path_factory):
14 |     cache_path = tmp_path_factory.mktemp("transformers_cache")
15 |     import transformers
16 | 
17 |     # we cannot use os.environ["TRANSFORMERS_CACHE"] = str(cache_path)
18 |     # because the TRANSFORMERS_CACHE is loaded when importing transformers
19 |     transformers.utils.hub.TRANSFORMERS_CACHE = str(cache_path)
20 | 
21 |     from datasets import disable_caching
22 | 
23 |     disable_caching()
24 |     create_onnx_model_file()
25 |     yield
26 |     delete_onnx_model_files()
27 |     shutil.rmtree(cache_path, ignore_errors=True)
28 | 


--------------------------------------------------------------------------------
/test/unit_test/data_container/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/engine/packaging/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/engine/packaging/code/score.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/hardware/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/model/user_script.py:
--------------------------------------------------------------------------------
 1 | from test.unit_test.utils import get_pytorch_model
 2 | 
 3 | 
 4 | def load_decoder_model(model_path):
 5 |     return get_pytorch_model().load_model()
 6 | 
 7 | 
 8 | def load_decoder_with_past_model(model_path):
 9 |     return get_pytorch_model().load_model()
10 | 
11 | 
12 | def decoder_with_past_inputs(model):
13 |     pass
14 | 
15 | 
16 | def decoder_inputs(model):
17 |     pass
18 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/common/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/common/test_user_script.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from olive.hardware import DEFAULT_CPU_ACCELERATOR
 6 | from olive.passes.onnx.session_params_tuning import OrtSessionParamsTuning
 7 | 
 8 | 
 9 | class TestUserScriptConfig:
10 |     def test_no_config(self):
11 |         config = OrtSessionParamsTuning.generate_config(DEFAULT_CPU_ACCELERATOR, disable_search=True)
12 |         assert config
13 |         assert OrtSessionParamsTuning.validate_config(config, DEFAULT_CPU_ACCELERATOR)
14 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/inc/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/test_float16_conversion.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import onnx
 6 | import pytest
 7 | 
 8 | from olive.passes.olive_pass import create_pass_from_dict
 9 | from olive.passes.onnx.float16_conversion import OnnxFloatToFloat16
10 | from test.unit_test.utils import get_onnx_model
11 | 
12 | 
13 | @pytest.mark.parametrize("keep_io_types", [True, False])
14 | def test_onnxfloattofloat16(keep_io_types, tmp_path):
15 |     # setup
16 |     # this is a simple model with a single Gemm node
17 |     input_model = get_onnx_model()
18 |     p = create_pass_from_dict(OnnxFloatToFloat16, {"keep_io_types": keep_io_types}, disable_search=True)
19 |     output_folder = str(tmp_path / "onnx")
20 | 
21 |     # execute
22 |     output_model = p.run(input_model, output_folder)
23 | 
24 |     # assert
25 |     # check that the input and output types are as expected
26 |     io_config = output_model.io_config
27 |     for io_type in [*io_config["input_types"], *io_config["output_types"]]:
28 |         assert io_type == ("float32" if keep_io_types else "float16")
29 | 
30 |     # check that the model initializer types are float16
31 |     for initializer in output_model.load_model().graph.initializer:
32 |         assert initializer.data_type == onnx.TensorProto.FLOAT16
33 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/test_mixed_precision.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | from olive.passes.olive_pass import create_pass_from_dict
 7 | from olive.passes.onnx.mixed_precision import OrtMixedPrecision
 8 | from test.unit_test.utils import get_onnx_model
 9 | 
10 | 
11 | def test_ort_mixed_precision_pass(tmp_path):
12 |     # setup
13 |     input_model = get_onnx_model()
14 |     p = create_pass_from_dict(OrtMixedPrecision, {}, disable_search=True)
15 |     output_folder = str(tmp_path / "onnx")
16 | 
17 |     # execute
18 |     p.run(input_model, output_folder)
19 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/test_model_builder.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from pathlib import Path
 6 | 
 7 | import pytest
 8 | 
 9 | from olive.model import ONNXModelHandler
10 | from olive.passes.olive_pass import create_pass_from_dict
11 | from olive.passes.onnx.model_builder import ModelBuilder
12 | from test.unit_test.utils import make_local_tiny_llama
13 | 
14 | 
15 | @pytest.mark.parametrize("metadata_only", [True, False])
16 | def test_model_builder(tmp_path, metadata_only):
17 |     input_model = make_local_tiny_llama(tmp_path / "input_model", "onnx" if metadata_only else "hf")
18 | 
19 |     p = create_pass_from_dict(ModelBuilder, {"precision": "fp32", "metadata_only": metadata_only}, disable_search=True)
20 |     output_folder = tmp_path / "output_model"
21 | 
22 |     # execute the pass
23 |     output_model = p.run(input_model, output_folder)
24 | 
25 |     # assert
26 |     assert isinstance(output_model, ONNXModelHandler)
27 |     assert Path(output_model.model_path).exists()
28 |     assert Path(output_folder / "genai_config.json").exists()
29 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/onnx/test_qnn_mixed_precision_overrides.py:
--------------------------------------------------------------------------------
 1 | from olive.passes.olive_pass import create_pass_from_dict
 2 | from olive.passes.onnx.mixed_precision_overrides import MixedPrecisionOverrides
 3 | from test.unit_test.utils import get_onnx_model
 4 | 
 5 | 
 6 | def test_qnn_mixed_precision_overrides(tmp_path):
 7 |     input_model = get_onnx_model()
 8 |     p = create_pass_from_dict(
 9 |         MixedPrecisionOverrides,
10 |         {
11 |             "overrides_config": {
12 |                 "/fc1/Gemm_output_0": "QUInt16",
13 |             }
14 |         },
15 |         disable_search=True,
16 |     )
17 |     out = p.run(input_model, tmp_path)
18 |     assert out == input_model
19 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/openvino/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/openvino/user_script.py:
--------------------------------------------------------------------------------
 1 | from test.unit_test.utils import get_pytorch_model, get_pytorch_model_dummy_input
 2 | 
 3 | 
 4 | def get_dummy_input():
 5 |     input_model = get_pytorch_model()
 6 |     return get_pytorch_model_dummy_input(input_model)
 7 | 
 8 | 
 9 | def get_input():
10 |     return [[1, 1]]
11 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/pytorch/test_autoawq.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | from pathlib import Path
 6 | 
 7 | import pytest
 8 | import torch
 9 | 
10 | from olive.hardware.accelerator import AcceleratorSpec, Device
11 | from olive.model import HfModelHandler
12 | from olive.passes.olive_pass import create_pass_from_dict
13 | from olive.passes.pytorch.autoawq import AutoAWQQuantizer
14 | 
15 | 
16 | @pytest.mark.skipif(
17 |     not torch.cuda.is_available(),
18 |     reason="awq requires GPU.",
19 | )
20 | def test_awq(tmp_path: Path):
21 |     # setup
22 |     input_model = HfModelHandler(model_path="facebook/opt-125m", load_kwargs={"use_safetensors": False})
23 | 
24 |     p = create_pass_from_dict(
25 |         AutoAWQQuantizer,
26 |         disable_search=True,
27 |         accelerator_spec=AcceleratorSpec(accelerator_type=Device.GPU, execution_provider="CUDAExecutionProvider"),
28 |     )
29 |     awq_out_folder = str(tmp_path / "awq")
30 | 
31 |     # execute
32 |     out = p.run(input_model, awq_out_folder)
33 | 
34 |     # assert
35 |     assert isinstance(out, HfModelHandler)
36 | 
37 |     from transformers import OPTForCausalLM
38 | 
39 |     assert isinstance(out.load_model(), OPTForCausalLM)
40 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/qnn/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/test_pass_serialization.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import pytest
 6 | 
 7 | from olive.hardware import DEFAULT_CPU_ACCELERATOR
 8 | from olive.passes.olive_pass import FullPassConfig
 9 | from olive.passes.onnx.conversion import OnnxConversion
10 | 
11 | 
12 | @pytest.mark.parametrize("host_device", [None, "cpu", "gpu"])
13 | def test_pass_serialization(host_device):
14 |     config = OnnxConversion.generate_config(DEFAULT_CPU_ACCELERATOR)
15 |     onnx_conversion = OnnxConversion(DEFAULT_CPU_ACCELERATOR, config, host_device=host_device)
16 |     json = onnx_conversion.to_json(True)
17 | 
18 |     cfg = FullPassConfig.from_json(json)
19 |     p = cfg.create_pass()
20 |     assert isinstance(p, OnnxConversion)
21 |     assert p.accelerator_spec == DEFAULT_CPU_ACCELERATOR
22 |     assert p.config == config
23 |     assert p.host_device == host_device
24 | 


--------------------------------------------------------------------------------
/test/unit_test/passes/vitis_ai/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/resource_path/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/snpe/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/azureml/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/azureml/data_dir/datafile.json:
--------------------------------------------------------------------------------
1 | {  }
2 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/azureml/output_metrics/pipeline_output/named-outputs/accuracy/metric_result.json:
--------------------------------------------------------------------------------
1 | {
2 |     "accuracy-accuracy_score": { "value": 0.99618, "priority": 1, "higher_is_better": true },
3 |     "accuracy-f1_score": { "value": 0.99618, "priority": -1, "higher_is_better": true },
4 |     "accuracy-precision": { "value": 0.99618, "priority": -1, "higher_is_better": true },
5 |     "accuracy-recall": { "value": 0.99618, "priority": -1, "higher_is_better": true },
6 |     "accuracy-auroc": { "value": 0.99618, "priority": -1, "higher_is_better": true }
7 | }
8 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/azureml/output_metrics/pipeline_output/named-outputs/latency/metric_result.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "latency-avg": { "value": 0.031415, "priority": 1, "higher_is_better": false },
 3 |     "latency-max": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 4 |     "latency-min": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 5 |     "latency-p50": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 6 |     "latency-p75": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 7 |     "latency-p90": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 8 |     "latency-p95": { "value": 0.031415, "priority": -1, "higher_is_better": false },
 9 |     "latency-p99": { "value": 0.031415, "priority": -1, "higher_is_better": false },
10 |     "latency-p999": { "value": 0.031415, "priority": -1, "higher_is_better": false }
11 | }
12 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/azureml/script_dir/user_script.py:
--------------------------------------------------------------------------------
1 | # Test file for test__create_data_script_inputs_and_args
2 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/docker/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/docker/output_local_path/eval_res.json:
--------------------------------------------------------------------------------
1 | {
2 |     "accuracy-accuracy_score": { "value": 0.99618, "priority": 1, "higher_is_better": true },
3 |     "accuracy-f1_score": { "value": 0.99618, "priority": 1, "higher_is_better": true },
4 |     "accuracy-precision": { "value": 0.99618, "priority": 1, "higher_is_better": true },
5 |     "accuracy-recall": { "value": 0.99618, "priority": 1, "higher_is_better": true },
6 |     "accuracy-auroc": { "value": 0.99618, "priority": 1, "higher_is_better": true }
7 | }
8 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/isolated_ort/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/python_environment/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/systems/test_utils.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | import json
 6 | from unittest.mock import patch
 7 | 
 8 | from olive.systems.utils.available_providers_runner import main as available_providers_main
 9 | 
10 | 
11 | @patch("onnxruntime.get_available_providers")
12 | def test_available_providers_runner(mock_get_providers, tmp_path):
13 |     mock_get_providers.return_value = ["DummyExecutionProvider"]
14 |     output_path = tmp_path / "available_eps.json"
15 | 
16 |     # command
17 |     args = ["--output_path", str(output_path)]
18 | 
19 |     # execute
20 |     available_providers_main(args)
21 | 
22 |     # assert
23 |     assert output_path.exists()
24 |     mock_get_providers.assert_called_once()
25 |     with output_path.open("r") as f:
26 |         assert json.load(f) == ["DummyExecutionProvider"]
27 | 


--------------------------------------------------------------------------------
/test/unit_test/test_package_config.py:
--------------------------------------------------------------------------------
 1 | # -------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft Corporation. All rights reserved.
 3 | # Licensed under the MIT License.
 4 | # --------------------------------------------------------------------------
 5 | 
 6 | from olive.package_config import OlivePackageConfig
 7 | 
 8 | 
 9 | class TestPackageConfig:
10 |     def test_passes_configuration(self):
11 |         package_config = OlivePackageConfig.load_default_config()
12 |         for pass_module_name, pass_module_config in package_config.passes.items():
13 |             assert pass_module_config.module_path
14 |             assert pass_module_config.module_path[-len(pass_module_name) :].lower() == pass_module_name
15 |             package_config.import_pass_module(pass_module_name)
16 | 


--------------------------------------------------------------------------------
/test/unit_test/workflows/__init__.py:
--------------------------------------------------------------------------------
1 | # -------------------------------------------------------------------------
2 | # Copyright (c) Microsoft Corporation. All rights reserved.
3 | # Licensed under the MIT License.
4 | # --------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/test/unit_test/workflows/mock_data/default_engine.json:
--------------------------------------------------------------------------------
1 | {
2 |     "input_model": {
3 |         "type": "HfModel",
4 |         "model_path": "hf-internal-testing/tiny-random-BertForSequenceClassification",
5 |         "task": "text-classification"
6 |     },
7 |     "passes": { "conversion": { "type": "OnnxConversion" } }
8 | }
9 | 


--------------------------------------------------------------------------------
/test/unit_test/workflows/mock_data/dependency_setup.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "input_model": {
 3 |         "type": "PyTorchModel",
 4 |         "model_path": "dummy_model.pt",
 5 |         "io_config": { "input_names": [ "x" ], "input_shapes": [ [ 1, 2, 3 ] ], "output_names": [ "y" ] }
 6 |     },
 7 |     "systems": { "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu" } ] } },
 8 |     "passes": {
 9 |         "onnx_conversion": { "type": "OnnxConversion" },
10 |         "session_params_tuning": { "type": "OrtSessionParamsTuning" }
11 |     },
12 |     "host": "local_system",
13 |     "target": "local_system"
14 | }
15 | 


--------------------------------------------------------------------------------
/test/unit_test/workflows/mock_data/readymade_system.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "azureml_client": {
 3 |         "subscription_id": "my_subscription_id",
 4 |         "resource_group": "my_resource_group",
 5 |         "workspace_name": "my_workspace"
 6 |     },
 7 |     "input_model": { "type": "PyTorchModel", "model_path": "models/resnet_trained_for_cifar10.pt" },
 8 |     "systems": {
 9 |         "azureml_system": {
10 |             "type": "AzureNDV2System",
11 |             "config": {
12 |                 "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ],
13 |                 "aml_compute": "gpu-cluster",
14 |                 "aml_docker_config": {
15 |                     "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04",
16 |                     "conda_file_path": "conda.yaml"
17 |                 },
18 |                 "is_dev": true
19 |             }
20 |         }
21 |     },
22 |     "passes": { "onnx_conversion": { "type": "OnnxConversion" } },
23 |     "engine": { "host": "azureml_system", "target": "azureml_system" }
24 | }
25 | 


--------------------------------------------------------------------------------